# load necessary packages and data
library(tidyr)
library(dplyr)
library(readr)
library(plotly)
library(ggplot2)
library(jgcricolors)
library(Polychrome)
setwd("C:/Users/spei632/Documents/GCAM_industry/food_processing")
fig_dir <- "C:/Users/spei632/Documents/GCAM_industry/food_processing/initial_exploration/figures_assumptions_update"
if(!dir.exists(fig_dir)) {
dir.create(fig_dir)
}
# load data
IEA_en_bal <- read_csv("initial_exploration/data/L100.IEA_en_bal_ctry_hist.csv")
UN_popTot <- read_csv("initial_exploration/data/UN_popTot.csv", skip = 7)
USDA_GDP_MER <- read_csv("initial_exploration/data/USDA_GDP_MER.csv", skip = 6)
WB_ExtraCountries_GDP_MER <- read_csv("initial_exploration/data/WB_ExtraCountries_GDP_MER.csv", skip = 7)
socioeconomics_ctry <- read_csv("initial_exploration/data/socioeconomics_ctry.csv", skip = 7)
macronutrientrate_2010_2019_mean <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_MacroNutrientRate_179Regs_426Items_2010to2019Mean.csv", skip = 8)
fao_sua_2010_2019 <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_SUA_195Regs_530Items_2010to2019.csv", skip = 8)
fao_fbsh_1973_2009 <- read_csv("initial_exploration/data/GCAMDATA_FAOSTAT_FBSH_CB_173Regs_118Items_1973to2009.csv", skip = 8)
fao_pop <- read_csv("initial_exploration/data/FAOSTAT_data_pop_12-29-2022.csv")
fao_gdp <- read_csv("initial_exploration/data/FAOSTAT_data_gdp_12-29-2022.csv")
L101.CropMeat_Food_Pcal_R_C_Y <- read_csv("initial_exploration/data/L101.CropMeat_Food_Pcal_R_C_Y.csv")
# set constants
CONV_KTOE_EJ <- 0.00004186
hist_years <- c(1971:2015)
gcam_hist_years <- c(1990, 2005, 2010, 2015)
conv_P_k <- 10^12
# mappings
IEA_product_fuel <- read_csv("mappings/IEA_product_fuel.csv", skip = 7)
enduse_fuel_aggregation <- read_csv("mappings/enduse_fuel_aggregation.csv", skip = 5)
IEA_ctry <- read_csv("mappings/iea_ctry.csv", skip = 5)
iso_GCAM_regID <- read_csv("mappings/iso_GCAM_regID.csv", skip = 6)
GCAM_region_names <- read_csv("mappings/GCAM_region_names.csv", skip = 6)
AGLU_ctry <- read_csv("mappings/AGLU_ctry.csv", skip = 7)
Mapping_SUA_PrimaryEquivalent <- read_csv("initial_exploration/data/Mapping_SUA_PrimaryEquivalent.csv", skip = 21)
Mapping_item_FBS_GCAM <- read_csv("initial_exploration/data/Mapping_item_FBS_GCAM.csv", skip = 7)
SUA_item_code_map <- read_csv("initial_exploration/data/SUA_item_code_map.csv", skip = 9)
Area_Region_Map <- read_csv("initial_exploration/data/Area_Region_Map.csv")
GCAM_commodity_type_mapping <- read_csv("mappings/GCAM_commodity_type_mapping.csv")
# colors and palettes
data(palette36)
palette36 <- unname(palette36)
pal_sel <- jgcricol()$pal_all
# assumptions/criteria for filtering data
min_frac_foodpro <- 0.01
min_year <- 1990
max_frac_inonspec <- 0.5
override_frac_foodpro <- 0.1
max_pval <- 0.05
Calculate GDP per capita from the GDP and population data.
# get just historical populations and GDPs
UN_popTot_hist <- UN_popTot %>%
filter(Year <= 2015 & Scenario == "EST") %>%
mutate(iso = tolower(Country))
GDP_MER_hist <- rbind(USDA_GDP_MER, WB_ExtraCountries_GDP_MER) %>%
pivot_longer(cols = -c(Country, iso), names_to = "year", values_to = "GDP_MER_bil2010USD") %>%
mutate(year = as.numeric(year)) %>%
filter(year <= 2015)
# join and calculate GDP per capita
GDP_per_cap_hist_calc <- GDP_MER_hist %>%
left_join(UN_popTot_hist %>% dplyr::select(year = Year, iso, pop_thousands = Value)) %>%
mutate(GDP_per_cap_thous2015USD = (GDP_MER_bil2010USD*10^6*gcamdata::gdp_deflator(2015, 2010)) / (pop_thousands * 1000))
# get from the FAO data (also include population)
fao_gdp_per_cap_hist <- fao_gdp %>%
filter(Element == "Value US$ per capita, 2015 prices" & Year <= 2015) %>%
left_join(fao_pop %>% mutate(pop = Value * 1000) %>% dplyr::select(Area, Year, pop))
# prep FAO data for joining
fao_gdp_per_cap_hist_relabel <- fao_gdp_per_cap_hist %>%
# exclude USSR values in 1990, because these are covered by individual regions
filter(!(Area == "USSR" & Year == 1990)) %>%
# also exclude the "China" GDP since that includes Hong Kong and Macao as well, which are also broken out; there is another one that is "China, mainland" which we keep
filter(Area != "China") %>%
mutate(GDP_pcap_thous2015USD_FAO = Value / 1000) %>%
dplyr::select(area = Area, year = Year, GDP_pcap_thous2015USD_FAO, pop) %>%
# edit a few areas and join to isos and GCAM region IDs
mutate(area = case_when(area == "Côte d'Ivoire" ~ "Cote dIvoire",
area == "Curaçao" ~ "Curacao",
area == "Democratic People's Republic of Korea" ~ "Democratic Peoples Republic of Korea",
area == "Lao People's Democratic Republic" ~ "Lao Peoples Democratic Republic",
area == "Sint Maarten (Dutch part)" ~ "Sint Maarten (Dutch Part)",
area == "Türkiye" ~ "Turkiye",
grepl("Yemen", area) ~ "Yemen",
TRUE ~ area)) %>%
group_by(area, year) %>%
summarize(GDP_pcap_thous2015USD_FAO = weighted.mean(GDP_pcap_thous2015USD_FAO, pop),
pop = sum(pop)) %>%
ungroup() %>%
left_join(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
left_join(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso")
# combine
GDP_per_cap_hist_calc_w_FAO <- GDP_per_cap_hist_calc %>%
dplyr::select(area = Country, iso, year, GDP_pcap_thous2015USD_calc = GDP_per_cap_thous2015USD) %>%
full_join(fao_gdp_per_cap_hist_relabel %>%
rename(area_FAO = area))
First get IEA data for industry (non-specified and specific industries).
# industry flows
flows_sel <- c("MINING", "CONSTRUC", "IRONSTL", "CHEMICAL", "NONFERR", "NONMET", "TRANSEQ", "MACHINE", "FOODPRO", "PAPERPRO", "WOODPRO", "TEXTILES", "INONSPEC")
IEA_ind_sectors_region_fuel <- IEA_en_bal %>%
filter(FLOW %in% flows_sel) %>%
dplyr::select(iso, FLOW, PRODUCT, as.character(hist_years)) %>%
left_join(select(iso_GCAM_regID, iso, GCAM_region_ID, country_name), by = "iso") %>%
left_join(select(IEA_product_fuel, fuel, PRODUCT = product), by = "PRODUCT") %>%
left_join(enduse_fuel_aggregation %>% dplyr::select(fuel, industry)) %>%
# remap biomass_tradbio to biomass
mutate(industry = if_else(fuel == "biomass_tradbio", "biomass", industry)) %>%
pivot_longer(as.character(hist_years), names_to = "year", values_to = "value") %>%
rename(fuel_orig = fuel, fuel = industry) %>%
filter(!is.na(fuel)) %>%
group_by(iso, country_name, GCAM_region_ID, year, fuel, FLOW) %>%
# summarize and convert to EJ
summarize(value = sum(value, na.rm = TRUE) * CONV_KTOE_EJ) %>%
ungroup() %>%
mutate(year = as.numeric(year)) %>%
group_by(iso, country_name, GCAM_region_ID, year, fuel) %>%
mutate(frac_sector = value / sum(value)) %>%
ungroup()
# complete nesting so that there are values for all sectors, fuels, and years, even if 0
all_IEA_years <- unique(IEA_ind_sectors_region_fuel$year)
all_IEA_fuels <- unique(IEA_ind_sectors_region_fuel$fuel)
IEA_ind_sectors_region_fuel <- IEA_ind_sectors_region_fuel %>%
complete(nesting(iso, country_name, GCAM_region_ID),
year = all_IEA_years,
FLOW = flows_sel,
fuel = all_IEA_fuels) %>%
mutate(value = replace_na(value, 0),
frac_sector = replace_na(frac_sector, 0))
# aggregate to GCAM region level
IEA_ind_sectors_GCAM_region_fuel <- IEA_ind_sectors_region_fuel %>%
group_by(GCAM_region_ID, year, fuel, FLOW) %>%
summarize(value = sum(value)) %>%
ungroup() %>%
full_join(GCAM_region_names) %>%
group_by(GCAM_region_ID, year, fuel) %>%
mutate(frac_sector = value / sum(value)) %>%
ungroup()
# get the regions and fuels with all their energy in INONSPEC in all years
country_fuels_all_inonspec <- IEA_ind_sectors_region_fuel %>%
filter(FLOW == "INONSPEC") %>%
group_by(iso, country_name, GCAM_region_ID, fuel) %>%
filter(all(frac_sector == 1)) %>%
dplyr::select(iso, country_name, GCAM_region_ID, fuel) %>%
distinct()
# aggregate to total energy
IEA_ind_sectors_region_total_en <- IEA_ind_sectors_region_fuel %>%
group_by(iso, country_name, GCAM_region_ID, year, FLOW) %>%
summarize(value = sum(value)) %>%
group_by(iso, country_name, GCAM_region_ID, year) %>%
mutate(frac = value / sum(value)) %>%
ungroup()
# aggregate to GCAM region
IEA_ind_sectors_GCAM_region_total_en <- IEA_ind_sectors_region_total_en %>%
group_by(GCAM_region_ID, year, FLOW) %>%
summarize(value = sum(value)) %>%
group_by(GCAM_region_ID, year) %>%
mutate(frac = value / sum(value)) %>%
ungroup()
Get food production.
# from Xin's updated AGLU branch (now merged into master) - processing into GCAM commodities and calories
# 2010-2019 data first
Mapping_SUA_PrimaryEquivalent %>%
select(GCAM_commodity, item = source_item) %>%
bind_rows(Mapping_SUA_PrimaryEquivalent %>%
select(GCAM_commodity, item = sink_item)) %>%
distinct() %>% arrange(GCAM_commodity) ->
SUA_Items_GCAM
# fao_sua_2010_2019 %>% distinct(item) %>%
# filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM
SUA_item_code_map %>%
filter(!item %in% unique(SUA_Items_GCAM$item)) -> SUA_Items_NonGCAM
SUA_item_code_map %>%
filter(item_code %in% unique(macronutrientrate_2010_2019_mean$item_code)) %>%
left_join(SUA_Items_GCAM, by = "item") %>%
# For NA GCAM_commodity: not elsewhere classified (NEC)
# So we would know % of food calories not included in GCAM commodities
mutate(GCAM_commodity = if_else(is.na(GCAM_commodity), "NEC", GCAM_commodity)) ->
SUA_Items_Food
# get world average macronutrient rate
macronutrientrate_2010_2019_mean %>% tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc) %>%
group_by(item, item_code, macronutrient) %>%
summarise(macronutrient_value_World = mean(macronutrient_value), .groups = "drop") %>%
ungroup() ->
SUA_food_macronutrient_rate_World
# calculate SUA food Calories consumption by joining macronutrient rates and SUA food - adapted from Xin's code, not using world average for regions with missing data
fao_sua_2010_2019 %>%
filter(element == "Food", item_code %in% SUA_Items_Food$item_code) %>%
# ensure we at least have a complete series across time otherwise it may
# throw off moving avg calculations
complete(area_code = Area_Region_Map$area_code, year = pull(., year) %>% unique(), nesting(item_code, element), fill=list(value=0)) %>%
rename(Food_Kt = value) %>%
select(-element) %>%
gcamdata::left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
gcamdata::repeat_add_columns(
tibble(macronutrient = c("calperg", "fatperc", "proteinperc"))) %>%
left_join(macronutrientrate_2010_2019_mean %>%
tidyr::gather(macronutrient, macronutrient_value, calperg:proteinperc),
by = c("area_code", "item_code", "item", "macronutrient")) %>%
# gcamdata::left_join_error_no_match(SUA_food_macronutrient_rate_World,
# by = c("item_code", "item", "macronutrient")) %>%
# mutate(macronutrient_value = if_else(is.na(macronutrient_value),
# macronutrient_value_World,
# macronutrient_value)) %>%
# calculate total Cal, protein and fat in food
# value was in 1000 ton or 10^ 9 g
mutate(value = macronutrient_value * Food_Kt,
value = if_else(macronutrient %in% c("fatperc", "proteinperc"),
value / 100 /1000, value)) %>% # unit from perc to Mt
#select(-macronutrient_value, -macronutrient_value_World, -Food_Kt) %>%
select(-macronutrient_value) %>%
# rename element with units
mutate(macronutrient = case_when(
macronutrient == "calperg" ~ "MKcal",
macronutrient == "fatperc" ~ "MtFat",
macronutrient == "proteinperc" ~ "MtProtein" )) %>%
gcamdata::left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
FAO_Food_Macronutrient_All_2010_2019
# calculate macronutrient rates from the aggregated values - aggregated to GCAM commodities
macronutrientrate_agg_GCAM_commod_2010_2019 <- FAO_Food_Macronutrient_All_2010_2019 %>%
pivot_wider(names_from = macronutrient, values_from = value) %>%
# in this calculation, we only want to include food that has an associated calorie content (to not have too high of a denominator)
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, Food_Kt)) %>%
group_by(area_code, area, year, GCAM_commodity, iso, GCAM_region_ID) %>%
summarize(Food_Kt = sum(Food_Kt_w_calorie),
MKcal = sum(MKcal, na.rm = TRUE),
MtFat = sum(MtFat, na.rm = TRUE),
MtProtein = sum(MtProtein, na.rm = TRUE)) %>%
mutate(calperg = MKcal / Food_Kt,
fatperc = MtFat / Food_Kt * 100 * 1000,
proteinperc = MtProtein / Food_Kt * 100 * 1000)
# aggregate to mean of all years
macronutrientrate_agg_GCAM_commod_2010_2019_mean <- macronutrientrate_agg_GCAM_commod_2010_2019 %>%
group_by(area_code, area, GCAM_commodity, iso, GCAM_region_ID) %>%
summarize(Food_Kt_sum = sum(Food_Kt),
MKcal_sum = sum(MKcal),
MtFat_sum = sum(MtFat),
MtProtein_sum = sum(MtProtein),
calperg_mean = mean(calperg, na.rm = TRUE),
fatperc = mean(fatperc, na.rm = TRUE),
proteinperc = mean(proteinperc, na.rm = TRUE),
calperg_mean_weighted = MKcal_sum / Food_Kt_sum) %>%
ungroup()
# aggregate to total calories
FAO_Food_Macronutrient_All_2010_2019_total <- FAO_Food_Macronutrient_All_2010_2019 %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
group_by(area, year, iso, GCAM_region_ID, macronutrient) %>%
summarize(macronutrient_value = sum(value, na.rm = TRUE),
Food_Kt = sum(Food_Kt),
Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
ungroup() %>%
mutate(year = as.numeric(year))
# aggregate to staples, meat, and other non-staples
FAO_Food_Macronutrient_All_2010_2019_total_cats <- FAO_Food_Macronutrient_All_2010_2019 %>%
left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
group_by(area, year, iso, GCAM_region_ID, commodity_type_broad, macronutrient) %>%
summarize(macronutrient_value = sum(value, na.rm = TRUE),
Food_Kt = sum(Food_Kt),
Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
ungroup() %>%
mutate(year = as.numeric(year))
# aggregate by GCAM commodity
FAO_Food_Macronutrient_All_2010_2019_commodity <- FAO_Food_Macronutrient_All_2010_2019 %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_macronutrient = if_else(is.na(value), 0, Food_Kt)) %>%
group_by(area, year, iso, GCAM_region_ID, GCAM_commodity, macronutrient) %>%
summarize(macronutrient_value = sum(value, na.rm = TRUE),
Food_Kt = sum(Food_Kt),
Food_Kt_w_macronutrient = sum(Food_Kt_w_macronutrient)) %>%
ungroup() %>%
mutate(year = as.numeric(year))
# data before 2010
fao_fbsh_1973_2009 %>%
gcamdata::gather_years() %>%
filter(year < min(fao_sua_2010_2019$year)) %>%
filter(!is.na(value)) ->
FBSH_CB
Mapping_item_FBS_GCAM %>%
select(item_code, GCAM_commodity)%>%
filter(!is.na(GCAM_commodity)) %>%
left_join(FBSH_CB %>%
# complete element
complete(nesting(area, area_code, item_code, item, year), element,
fill = list(value = 0)),
by = "item_code") %>%
dplyr::group_by_at(vars(-value, -item, -item_code)) %>%
summarise(value = sum(value), .groups = "drop") %>%
gcamdata::left_join_error_no_match(AGLU_ctry %>% select(area = FAO_country, iso), by = "area") %>%
gcamdata::left_join_error_no_match(iso_GCAM_regID %>% select(iso, GCAM_region_ID), by = "iso") %>%
gcamdata::left_join_error_no_match(GCAM_region_names, by = "GCAM_region_ID") %>%
#dplyr::group_by_at(vars(area = region, year, GCAM_commodity, element)) %>%
dplyr::group_by_at(vars(area, region, GCAM_region_ID, year, GCAM_commodity, element, iso, unit)) %>%
summarise(value = sum(value), .groups = "drop") ->
FBSH_CB_reg
# get just food
FBSH_CB_reg_food <- FBSH_CB_reg %>%
filter(element == "Food" & !is.na(unit)) %>%
mutate(unit = "Kt")
# convert to calories using the (weighted) mean of the conversion rate from the 2010-2019 data
FBSH_CB_reg_food_cal <- FBSH_CB_reg_food %>%
left_join(macronutrientrate_agg_GCAM_commod_2010_2019_mean %>%
dplyr::select(area, iso, GCAM_region_ID, GCAM_commodity, calperg = calperg_mean_weighted),
by = c("area", "GCAM_region_ID", "iso", "GCAM_commodity")) %>%
mutate(MKcal = value * calperg)
# some regions have no data for food production for some crops in the 2010-2019 data but have a mean conversion rate for the individual commodities -- take the average of the commodities that constitute the overall GCAM commodity (assuming they contribute equally to the total production of the overall GCAM commodity - big assumption but not sure how else to do it)
# Actually not going to use this for now...they are far off from the real rates
macronutrientrate_2010_2019_mean_by_GCAM_commodity <- macronutrientrate_2010_2019_mean %>%
left_join(SUA_Items_Food) %>%
group_by(area_code, GCAM_commodity) %>%
summarize(calperg_mean = mean(calperg))
# aggregate to total calories
FBSH_CB_reg_food_cal_total <- FBSH_CB_reg_food_cal %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
group_by(area, year, iso, GCAM_region_ID, region) %>%
summarize(MKcal = sum(MKcal, na.rm = TRUE),
Food_Kt = sum(value),
Food_Kt_w_calorie = sum(Food_Kt_w_calorie)) %>%
ungroup()
# aggregate to staples, meat, and other non-staples
FBSH_CB_reg_food_cal_total_cats <- FBSH_CB_reg_food_cal %>%
left_join(GCAM_commodity_type_mapping %>% dplyr::select(GCAM_commodity, commodity_type_broad)) %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
group_by(area, year, iso, GCAM_region_ID, region, commodity_type_broad) %>%
summarize(MKcal = sum(MKcal, na.rm = TRUE),
Food_Kt = sum(value),
Food_Kt_w_calorie = sum(Food_Kt_w_calorie)) %>%
ungroup()
# combine the pre-2010 and 2010-2019 data
total_cal_food_kt_all_years <- FAO_Food_Macronutrient_All_2010_2019_total %>%
pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal) %>%
rbind(FBSH_CB_reg_food_cal_total %>%
dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt, Food_Kt_w_calorie)) %>%
mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
arrange(area, year)
total_cal_food_kt_all_years_cats <- FAO_Food_Macronutrient_All_2010_2019_total_cats %>%
pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal, commodity_type_broad) %>%
rbind(FBSH_CB_reg_food_cal_total_cats %>%
dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt, Food_Kt_w_calorie, commodity_type_broad)) %>%
mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
arrange(area, year, commodity_type_broad)
# combine the GCAM-commodity level data
total_cal_food_kt_all_years_commodity <- FAO_Food_Macronutrient_All_2010_2019_commodity %>%
pivot_wider(names_from = macronutrient, values_from = macronutrient_value) %>%
dplyr::select(area, year, iso, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie = Food_Kt_w_macronutrient, MKcal, GCAM_commodity) %>%
rbind(FBSH_CB_reg_food_cal %>%
# get Food_Kt both including and not including food that has no associated calorie content
mutate(Food_Kt_w_calorie = if_else(is.na(MKcal), 0, value)) %>%
dplyr::select(area, year, iso, GCAM_region_ID, MKcal, Food_Kt = value, Food_Kt_w_calorie, GCAM_commodity)) %>%
mutate(frac_Food_Kt_w_calorie = Food_Kt_w_calorie / Food_Kt,
calperg_food_w_calorie = MKcal / Food_Kt_w_calorie) %>%
arrange(area, year, GCAM_commodity)
# alternate options
# also obtain animal feed in production units
fao_sua_2010_2019 %>%
filter(element == "Feed", item_code %in% SUA_Items_Food$item_code) %>%
rename(Feed_Kt = value) %>%
select(-element) %>%
gcamdata::left_join_error_no_match(SUA_Items_Food, by = c("item_code")) %>%
gcamdata::left_join_error_no_match(Area_Region_Map %>% select(-region), by = "area_code") ->
FAO_Feed_Production_All_2010_2019
# aggregate to total feed production by region
total_feed_kt_2010_2019 <- FAO_Feed_Production_All_2010_2019 %>%
group_by(area_code, area, year, iso, GCAM_region_ID) %>%
summarize(Feed_Kt = sum(Feed_Kt)) %>%
mutate(year = as.numeric(year)) %>%
ungroup()
# get pre-2010 data
FBSH_CB_reg_feed <- FBSH_CB_reg %>%
filter(element == "Feed" & !is.na(unit)) %>%
mutate(unit = "Kt")
total_feed_kt_till_2010 <- FBSH_CB_reg_feed %>%
group_by(area, year, iso, GCAM_region_ID) %>%
summarize(Feed_Kt = sum(value)) %>%
ungroup()
# join
total_feed_kt_all_years <- total_feed_kt_2010_2019 %>%
dplyr::select(area, year, iso, GCAM_region_ID, Feed_Kt) %>%
rbind(total_feed_kt_till_2010 %>%
dplyr::select(area, year, iso, GCAM_region_ID, Feed_Kt)) %>%
arrange(area, year)
Plotting energy use by each of the industry sectors in the IEA data.
# plot energy use by industry sectors
ggplot(IEA_ind_sectors_region_total_en,
aes(x = year, y = value, color = FLOW)) +
geom_line() +
scale_color_manual(values = c("gray20", "red", "firebrick4", "deepskyblue1", "dodgerblue3",
"olivedrab", "darkgreen", "palegreen", "peachpuff2", "pink", "mediumpurple",
"gold1", "orange"), name = "sector") +
facet_wrap(~country_name, scale = "free") +
labs(x = "", y = "EJ", title = "Industry energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_by_sector_country_hist_IEA.png"), width = 35, height = 20)
# plot fractions in food processing and non-specified industry
ggplot(IEA_ind_sectors_region_total_en %>% filter(FLOW %in% c("INONSPEC", "FOODPRO")),
aes(x = year, y = frac, color = FLOW)) +
geom_line() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "slategray") +
scale_color_manual(values = c("firebrick4", "deepskyblue1"), name = "sector") +
facet_wrap(~country_name, scale = "free_x") +
labs(x = "", y = "Fraction", title = "Fraction of total industry energy use in non-specified industry and food processing (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_frac_inonspec_foodpro_country_hist_IEA.png"), width = 35, height = 20)
# replot at a GCAM region level
ggplot(IEA_ind_sectors_GCAM_region_total_en %>%
left_join(GCAM_region_names),
aes(x = year, y = value, color = FLOW)) +
geom_line() +
scale_color_manual(values = c("gray20", "red", "firebrick4", "deepskyblue1", "dodgerblue3",
"olivedrab", "darkgreen", "palegreen", "peachpuff2", "pink", "mediumpurple",
"gold1", "orange"), name = "sector") +
facet_wrap(~region, scale = "free") +
labs(x = "", y = "EJ", title = "Industry energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_by_sector_GCAM_reg_hist_IEA.png"), width = 35, height = 20)
ggplot(IEA_ind_sectors_GCAM_region_total_en %>% filter(FLOW %in% c("INONSPEC", "FOODPRO")) %>%
left_join(GCAM_region_names),
aes(x = year, y = frac, color = FLOW)) +
geom_line() +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "slategray") +
scale_color_manual(values = c("firebrick4", "deepskyblue1"), name = "sector") +
facet_wrap(~region, scale = "free_x") +
labs(x = "", y = "Fraction", title = "Fraction of total industry energy use in non-specified industry and food processing (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/ind_energy_use_frac_inonspec_foodpro_GCAM_reg_hist_IEA.png"), width = 35, height = 20)
Food processing energy use by fuel in the IEA data.
ggplot(IEA_ind_sectors_region_fuel %>% filter(FLOW == "FOODPRO"),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~country_name, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_country_fuel_hist_IEA_all.png"), width = 35, height = 20)
# also plot all data at GCAM region level
ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO"),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_all.png"), width = 35, height = 20)
First join data.
# first join the food processing energy use data with the food production data
IEA_foodpro_region_total_en <- IEA_ind_sectors_region_total_en %>%
filter(FLOW == "FOODPRO")
comb_data_all <- IEA_foodpro_region_total_en %>%
dplyr::select(iso, country_name, GCAM_region_ID, year, energy = value) %>%
full_join(total_cal_food_kt_all_years %>%
dplyr::select(iso, year, GCAM_region_ID, Food_Kt, Food_Kt_w_calorie, MKcal, calperg_food_w_calorie)) %>%
full_join(GDP_per_cap_hist_calc_w_FAO %>%
dplyr::select(iso, year, GCAM_region_ID, GDP_pcap_thous2015USD_calc, GDP_pcap_thous2015USD_FAO, pop)) %>%
left_join(total_feed_kt_all_years %>%
dplyr::select(iso, year, GCAM_region_ID, Feed_Kt)) %>%
filter(year %in% all_IEA_years & !is.na(energy)) %>%
arrange(country_name, year) %>%
# convert some units
mutate(PKcal = MKcal / (10^9))
# select just regions that do not have zero food processing energy use in all years, and also do not have no food production in all years
comb_data_sel <- comb_data_all %>%
group_by(iso) %>%
filter(!all(energy == 0) & !all(is.na(MKcal) | MKcal == 0)) %>%
ungroup()
# repeat for the food data with category of food -- add the food production by category and commodity to the rest of the data
comb_data_all_cats <- comb_data_all %>%
left_join(total_cal_food_kt_all_years_cats %>%
# convert some units
mutate(PKcal = MKcal / (10^9)) %>%
dplyr::select(iso, year, GCAM_region_ID, commodity_type_broad, PKcal) %>%
pivot_wider(names_from = commodity_type_broad, values_from = PKcal)) %>%
mutate(staples_frac = staples / PKcal,
nonstaples_frac = (nonstaples_animal + nonstaples_other) / PKcal,
EJ_per_PKcal = energy / PKcal,
EJ_pcap = energy / pop) %>%
left_join(total_cal_food_kt_all_years_commodity %>%
# convert some units
mutate(PKcal = MKcal / (10^9)) %>%
dplyr::select(iso, year, GCAM_region_ID, GCAM_commodity, PKcal) %>%
pivot_wider(names_from = GCAM_commodity, values_from = PKcal))
# select just regions that do not have zero food processing energy use in all years, and also do not have no food production in all years
comb_data_sel_cats <- comb_data_all_cats %>%
group_by(iso) %>%
filter(!all(energy == 0) & !all(is.na(MKcal) | MKcal == 0)) %>%
ungroup()
Now filter data and calculate relationships. Here I am only using data for regions and years where >1% of total industry energy use is allocated to food processing and <50% of total industry energy use is in non-specified industry, from 1990 onwards – these are set as constants at the beginning of the file.
IEA_ind_sectors_region_total_en_frac_wider <- IEA_ind_sectors_region_total_en %>%
dplyr::select(-value) %>%
pivot_wider(names_from = FLOW, values_from = frac)
# get regions and years to include - only where fraction in food processing is greater than 1% and fraction in non-specified is less than 50% and year is beyond 1990
country_years_incl <- IEA_ind_sectors_region_total_en_frac_wider %>%
# select only desired years in which INONSPEC fraction is not too high and food processing fraction is not too low, or in which food processing fraction is high even if INONSPEC fraction is high
filter(year >= min_year & ((INONSPEC < max_frac_inonspec & (!is.na(FOODPRO) & FOODPRO > min_frac_foodpro)) | FOODPRO > override_frac_foodpro)) %>%
dplyr::select(iso, country_name, GCAM_region_ID, year)
ggplot(IEA_ind_sectors_region_fuel %>% filter(FLOW == "FOODPRO") %>% right_join(country_years_incl),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~country_name, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_country_fuel_hist_IEA_sel.png"), width = 35, height = 20)
# filter the food and energy data
comb_data_sel_cats_final <- comb_data_sel_cats %>%
right_join(country_years_incl) %>%
filter(!is.na(MKcal),
MKcal > 0) %>%
mutate(nonstaples = nonstaples_other + nonstaples_animal)
# print output of linear models
summary(lm(energy ~ 0 + PKcal, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74877 -0.00110 0.00436 0.01927 0.80324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.03354 0.01518 68.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1239 on 1560 degrees of freedom
## Multiple R-squared: 0.7483, Adjusted R-squared: 0.7482
## F-statistic: 4638 on 1 and 1560 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69416 -0.03948 -0.02431 -0.00544 0.76554
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 0.987173 0.015277 64.62 <2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.014346 0.001193 12.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1206 on 1500 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.7703, Adjusted R-squared: 0.77
## F-statistic: 2515 on 2 and 1500 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + country_name, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## country_name, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64277 -0.00718 -0.00079 0.00668 0.34223
##
## Coefficients:
## Estimate Std. Error
## PKcal 2.352491 0.078045
## log(GDP_pcap_thous2015USD_FAO) 0.029186 0.007686
## country_nameAlbania -0.037591 0.016244
## country_nameAlgeria -0.132574 0.024112
## country_nameArgentina -0.079526 0.061511
## country_nameArmenia -0.040913 0.042368
## country_nameAustralia -0.053138 0.030369
## country_nameAustria -0.118643 0.029922
## country_nameAzerbaijan -0.057818 0.019303
## country_nameBelarus -0.034701 0.015178
## country_nameBelgium -0.101420 0.031175
## country_nameBenin -0.010047 0.017847
## country_nameBosnia and Herzegovina -0.049278 0.023208
## country_nameBotswana -0.052505 0.018902
## country_nameBrazil 0.116858 0.020968
## country_nameBulgaria -0.055723 0.016500
## country_nameCameroon -0.053173 0.020695
## country_nameCanada -0.170944 0.032486
## country_nameChina -2.260353 0.111366
## country_nameColombia -0.093006 0.015589
## country_nameCongo -0.034149 0.029986
## country_nameCosta Rica -0.057756 0.019780
## country_nameCote dIvoire -0.051821 0.016980
## country_nameCroatia -0.068637 0.021019
## country_nameCyprus -0.092645 0.026547
## country_nameCzech Republic -0.083026 0.023471
## country_nameDenmark -0.101454 0.031636
## country_nameDominican Republic -0.046362 0.015828
## country_nameEstonia -0.071404 0.022284
## country_nameFinland -0.106581 0.029902
## country_nameFrance -0.143063 0.027272
## country_nameGeorgia -0.036967 0.016080
## country_nameGermany -0.170284 0.027553
## country_nameGreece -0.104488 0.024764
## country_nameHungary -0.074496 0.020556
## country_nameIceland -0.107912 0.031109
## country_nameIreland -0.101293 0.030222
## country_nameIsrael -0.120229 0.031127
## country_nameItaly -0.206296 0.026809
## country_nameJamaica -0.051847 0.042999
## country_nameJapan -0.206964 0.026766
## country_nameKazakhstan -0.080942 0.025214
## country_nameKorea, Republic of -0.159133 0.023689
## country_nameKyrgyzstan -0.014229 0.033690
## country_nameLatvia -0.062774 0.020169
## country_nameLithuania -0.063961 0.020098
## country_nameLuxembourg -0.136048 0.038498
## country_nameMacedonia, the former Yugoslav Republic of -0.040729 0.015360
## country_nameMalta -0.091537 0.033664
## country_nameMexico -0.272799 0.019187
## country_nameMoldova, Republic of -0.012668 0.012579
## country_nameMontenegro -0.053808 0.022999
## country_nameMorocco -0.118164 0.017953
## country_nameMyanmar -0.121165 0.058463
## country_nameNetherlands -0.072896 0.029805
## country_nameNew Zealand -0.093123 0.028681
## country_nameNorway -0.121401 0.033979
## country_namePhilippines -0.159065 0.013421
## country_namePoland -0.099060 0.018530
## country_namePortugal -0.096514 0.024561
## country_nameRomania -0.086500 0.017504
## country_nameRussian Federation -0.104423 0.019674
## country_nameSenegal -0.032372 0.017625
## country_nameSerbia -0.050172 0.022251
## country_nameSlovakia -0.077191 0.021860
## country_nameSlovenia -0.085847 0.024847
## country_nameSpain -0.130528 0.025437
## country_nameSudan -0.093740 0.022481
## country_nameSweden -0.118838 0.030518
## country_nameSwitzerland -0.138804 0.034814
## country_nameTajikistan -0.001590 0.014562
## country_nameThailand 0.011842 0.015108
## country_nameTogo 0.005140 0.017601
## country_nameTunisia -0.065080 0.019961
## country_nameTurkey -0.235759 0.017988
## country_nameUkraine -0.087938 0.017895
## country_nameUnited Kingdom -0.158478 0.028568
## country_nameUnited States of America -0.202710 0.037889
## country_nameUruguay -0.080195 0.039878
## country_nameVenezuela -0.124482 0.021117
## country_nameViet Nam -0.145878 0.024785
## country_nameZimbabwe -0.027843 0.022118
## t value Pr(>|t|)
## PKcal 30.143 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 3.797 0.000152 ***
## country_nameAlbania -2.314 0.020805 *
## country_nameAlgeria -5.498 4.54e-08 ***
## country_nameArgentina -1.293 0.196263
## country_nameArmenia -0.966 0.334376
## country_nameAustralia -1.750 0.080381 .
## country_nameAustria -3.965 7.70e-05 ***
## country_nameAzerbaijan -2.995 0.002790 **
## country_nameBelarus -2.286 0.022384 *
## country_nameBelgium -3.253 0.001168 **
## country_nameBenin -0.563 0.573549
## country_nameBosnia and Herzegovina -2.123 0.033898 *
## country_nameBotswana -2.778 0.005545 **
## country_nameBrazil 5.573 2.99e-08 ***
## country_nameBulgaria -3.377 0.000752 ***
## country_nameCameroon -2.569 0.010288 *
## country_nameCanada -5.262 1.64e-07 ***
## country_nameChina -20.297 < 2e-16 ***
## country_nameColombia -5.966 3.06e-09 ***
## country_nameCongo -1.139 0.254960
## country_nameCosta Rica -2.920 0.003556 **
## country_nameCote dIvoire -3.052 0.002316 **
## country_nameCroatia -3.265 0.001119 **
## country_nameCyprus -3.490 0.000498 ***
## country_nameCzech Republic -3.537 0.000417 ***
## country_nameDenmark -3.207 0.001371 **
## country_nameDominican Republic -2.929 0.003454 **
## country_nameEstonia -3.204 0.001384 **
## country_nameFinland -3.564 0.000377 ***
## country_nameFrance -5.246 1.79e-07 ***
## country_nameGeorgia -2.299 0.021651 *
## country_nameGermany -6.180 8.35e-10 ***
## country_nameGreece -4.219 2.61e-05 ***
## country_nameHungary -3.624 0.000300 ***
## country_nameIceland -3.469 0.000538 ***
## country_nameIreland -3.352 0.000824 ***
## country_nameIsrael -3.863 0.000117 ***
## country_nameItaly -7.695 2.63e-14 ***
## country_nameJamaica -1.206 0.228104
## country_nameJapan -7.732 1.99e-14 ***
## country_nameKazakhstan -3.210 0.001356 **
## country_nameKorea, Republic of -6.718 2.67e-11 ***
## country_nameKyrgyzstan -0.422 0.672842
## country_nameLatvia -3.112 0.001893 **
## country_nameLithuania -3.182 0.001492 **
## country_nameLuxembourg -3.534 0.000423 ***
## country_nameMacedonia, the former Yugoslav Republic of -2.652 0.008100 **
## country_nameMalta -2.719 0.006625 **
## country_nameMexico -14.218 < 2e-16 ***
## country_nameMoldova, Republic of -1.007 0.314090
## country_nameMontenegro -2.340 0.019441 *
## country_nameMorocco -6.582 6.52e-11 ***
## country_nameMyanmar -2.073 0.038399 *
## country_nameNetherlands -2.446 0.014577 *
## country_nameNew Zealand -3.247 0.001194 **
## country_nameNorway -3.573 0.000365 ***
## country_namePhilippines -11.852 < 2e-16 ***
## country_namePoland -5.346 1.05e-07 ***
## country_namePortugal -3.930 8.92e-05 ***
## country_nameRomania -4.942 8.66e-07 ***
## country_nameRussian Federation -5.308 1.29e-07 ***
## country_nameSenegal -1.837 0.066459 .
## country_nameSerbia -2.255 0.024298 *
## country_nameSlovakia -3.531 0.000427 ***
## country_nameSlovenia -3.455 0.000566 ***
## country_nameSpain -5.131 3.27e-07 ***
## country_nameSudan -4.170 3.23e-05 ***
## country_nameSweden -3.894 0.000103 ***
## country_nameSwitzerland -3.987 7.03e-05 ***
## country_nameTajikistan -0.109 0.913097
## country_nameThailand 0.784 0.433274
## country_nameTogo 0.292 0.770306
## country_nameTunisia -3.260 0.001139 **
## country_nameTurkey -13.106 < 2e-16 ***
## country_nameUkraine -4.914 9.95e-07 ***
## country_nameUnited Kingdom -5.547 3.45e-08 ***
## country_nameUnited States of America -5.350 1.02e-07 ***
## country_nameUruguay -2.011 0.044515 *
## country_nameVenezuela -5.895 4.68e-09 ***
## country_nameViet Nam -5.886 4.94e-09 ***
## country_nameZimbabwe -1.259 0.208297
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05834 on 1420 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9491, Adjusted R-squared: 0.9462
## F-statistic: 323.1 on 82 and 1420 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + country_name + year, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## country_name + year, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63428 -0.00845 -0.00030 0.00819 0.33879
##
## Coefficients:
## Estimate Std. Error
## PKcal 2.4503326 0.0809640
## log(GDP_pcap_thous2015USD_FAO) -0.0161783 0.0131727
## country_nameAlbania -3.2667528 0.7639907
## country_nameAlgeria -3.3589532 0.7635382
## country_nameArgentina -3.2514600 0.7527711
## country_nameArmenia -3.2724781 0.7655480
## country_nameAustralia -3.1542359 0.7341490
## country_nameAustria -3.2235087 0.7350213
## country_nameAzerbaijan -3.2747219 0.7611626
## country_nameBelarus -3.2491976 0.7605010
## country_nameBelgium -3.2147768 0.7370796
## country_nameBenin -3.3028207 0.7790687
## country_nameBosnia and Herzegovina -3.2682287 0.7617542
## country_nameBotswana -3.2515811 0.7569369
## country_nameBrazil -3.0831613 0.7572138
## country_nameBulgaria -3.2540238 0.7566981
## country_nameCameroon -3.3289016 0.7751080
## country_nameCanada -3.2879846 0.7380060
## country_nameChina -5.6265260 0.8038884
## country_nameColombia -3.2985267 0.7583864
## country_nameCongo -3.2800471 0.7683573
## country_nameCosta Rica -3.2313396 0.7509311
## country_nameCote dIvoire -3.3215246 0.7735939
## country_nameCroatia -3.2368299 0.7496898
## country_nameCyprus -3.2218499 0.7406468
## country_nameCzech Republic -3.2368741 0.7463703
## country_nameDenmark -3.1959182 0.7326342
## country_nameDominican Republic -3.2503594 0.7580311
## country_nameEstonia -3.2309487 0.7476812
## country_nameFinland -3.2121509 0.7351872
## country_nameFrance -3.2631877 0.7385264
## country_nameGeorgia -3.2739034 0.7658259
## country_nameGermany -3.2890797 0.7382226
## country_nameGreece -3.2426840 0.7427114
## country_nameHungary -3.2418613 0.7494814
## country_nameIceland -3.2063642 0.7335545
## country_nameIreland -3.2048936 0.7347345
## country_nameIsrael -3.2385693 0.7382554
## country_nameItaly -3.3295334 0.7392455
## country_nameJamaica -3.2688415 0.7621417
## country_nameJapan -3.3331874 0.7399497
## country_nameKazakhstan -3.2664573 0.7539129
## country_nameKorea, Republic of -3.3030681 0.7440336
## country_nameKyrgyzstan -3.2984919 0.7775753
## country_nameLatvia -3.2373115 0.7511670
## country_nameLithuania -3.2389055 0.7512613
## country_nameLuxembourg -3.2030986 0.7264833
## country_nameMacedonia, the former Yugoslav Republic of -3.2541343 0.7602465
## country_nameMalta -3.2366274 0.7446863
## country_nameMexico -3.4574572 0.7535346
## country_nameMoldova, Republic of -3.2736781 0.7714548
## country_nameMontenegro -3.2549727 0.7575427
## country_nameMorocco -3.3627911 0.7676856
## country_nameMyanmar -3.4072648 0.7794579
## country_nameNetherlands -3.1778395 0.7350353
## country_nameNew Zealand -3.2066209 0.7370128
## country_nameNorway -3.2012171 0.7292769
## country_namePhilippines -3.4049236 0.7678855
## country_namePoland -3.2803875 0.7527308
## country_namePortugal -3.2363330 0.7430882
## country_nameRomania -3.2799799 0.7555804
## country_nameRussian Federation -3.3063490 0.7576303
## country_nameSenegal -3.3088570 0.7752118
## country_nameSerbia -3.2582591 0.7591576
## country_nameSlovakia -3.2411025 0.7487013
## country_nameSlovenia -3.2277718 0.7435955
## country_nameSpain -3.2626282 0.7412927
## country_nameSudan -3.3512082 0.7708396
## country_nameSweden -3.2200253 0.7341764
## country_nameSwitzerland -3.2129868 0.7279847
## country_nameTajikistan -3.2958560 0.7793543
## country_nameThailand -3.2010456 0.7601189
## country_nameTogo -3.3050842 0.7831899
## country_nameTunisia -3.2899137 0.7630545
## country_nameTurkey -3.4251394 0.7546222
## country_nameUkraine -3.3365099 0.7686172
## country_nameUnited Kingdom -3.2695923 0.7364452
## country_nameUnited States of America -3.3412601 0.7433419
## country_nameUruguay -3.2418568 0.7489039
## country_nameVenezuela -3.2869759 0.7483450
## country_nameViet Nam -3.4126742 0.7731148
## country_nameZimbabwe -3.3023878 0.7748669
## year 0.0016326 0.0003862
## t value Pr(>|t|)
## PKcal 30.264 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) -1.228 0.22
## country_nameAlbania -4.276 2.03e-05 ***
## country_nameAlgeria -4.399 1.17e-05 ***
## country_nameArgentina -4.319 1.67e-05 ***
## country_nameArmenia -4.275 2.04e-05 ***
## country_nameAustralia -4.296 1.85e-05 ***
## country_nameAustria -4.386 1.24e-05 ***
## country_nameAzerbaijan -4.302 1.81e-05 ***
## country_nameBelarus -4.272 2.06e-05 ***
## country_nameBelgium -4.362 1.38e-05 ***
## country_nameBenin -4.239 2.39e-05 ***
## country_nameBosnia and Herzegovina -4.290 1.90e-05 ***
## country_nameBotswana -4.296 1.86e-05 ***
## country_nameBrazil -4.072 4.92e-05 ***
## country_nameBulgaria -4.300 1.82e-05 ***
## country_nameCameroon -4.295 1.87e-05 ***
## country_nameCanada -4.455 9.04e-06 ***
## country_nameChina -6.999 3.96e-12 ***
## country_nameColombia -4.349 1.46e-05 ***
## country_nameCongo -4.269 2.09e-05 ***
## country_nameCosta Rica -4.303 1.80e-05 ***
## country_nameCote dIvoire -4.294 1.88e-05 ***
## country_nameCroatia -4.318 1.69e-05 ***
## country_nameCyprus -4.350 1.46e-05 ***
## country_nameCzech Republic -4.337 1.55e-05 ***
## country_nameDenmark -4.362 1.38e-05 ***
## country_nameDominican Republic -4.288 1.93e-05 ***
## country_nameEstonia -4.321 1.66e-05 ***
## country_nameFinland -4.369 1.34e-05 ***
## country_nameFrance -4.419 1.07e-05 ***
## country_nameGeorgia -4.275 2.04e-05 ***
## country_nameGermany -4.455 9.03e-06 ***
## country_nameGreece -4.366 1.36e-05 ***
## country_nameHungary -4.325 1.63e-05 ***
## country_nameIceland -4.371 1.33e-05 ***
## country_nameIreland -4.362 1.38e-05 ***
## country_nameIsrael -4.387 1.24e-05 ***
## country_nameItaly -4.504 7.22e-06 ***
## country_nameJamaica -4.289 1.92e-05 ***
## country_nameJapan -4.505 7.19e-06 ***
## country_nameKazakhstan -4.333 1.58e-05 ***
## country_nameKorea, Republic of -4.439 9.72e-06 ***
## country_nameKyrgyzstan -4.242 2.36e-05 ***
## country_nameLatvia -4.310 1.75e-05 ***
## country_nameLithuania -4.311 1.74e-05 ***
## country_nameLuxembourg -4.409 1.12e-05 ***
## country_nameMacedonia, the former Yugoslav Republic of -4.280 1.99e-05 ***
## country_nameMalta -4.346 1.48e-05 ***
## country_nameMexico -4.588 4.86e-06 ***
## country_nameMoldova, Republic of -4.244 2.34e-05 ***
## country_nameMontenegro -4.297 1.85e-05 ***
## country_nameMorocco -4.380 1.27e-05 ***
## country_nameMyanmar -4.371 1.32e-05 ***
## country_nameNetherlands -4.323 1.64e-05 ***
## country_nameNew Zealand -4.351 1.45e-05 ***
## country_nameNorway -4.390 1.22e-05 ***
## country_namePhilippines -4.434 9.96e-06 ***
## country_namePoland -4.358 1.41e-05 ***
## country_namePortugal -4.355 1.42e-05 ***
## country_nameRomania -4.341 1.52e-05 ***
## country_nameRussian Federation -4.364 1.37e-05 ***
## country_nameSenegal -4.268 2.10e-05 ***
## country_nameSerbia -4.292 1.89e-05 ***
## country_nameSlovakia -4.329 1.60e-05 ***
## country_nameSlovenia -4.341 1.52e-05 ***
## country_nameSpain -4.401 1.16e-05 ***
## country_nameSudan -4.347 1.48e-05 ***
## country_nameSweden -4.386 1.24e-05 ***
## country_nameSwitzerland -4.414 1.09e-05 ***
## country_nameTajikistan -4.229 2.50e-05 ***
## country_nameThailand -4.211 2.70e-05 ***
## country_nameTogo -4.220 2.60e-05 ***
## country_nameTunisia -4.312 1.73e-05 ***
## country_nameTurkey -4.539 6.13e-06 ***
## country_nameUkraine -4.341 1.52e-05 ***
## country_nameUnited Kingdom -4.440 9.71e-06 ***
## country_nameUnited States of America -4.495 7.53e-06 ***
## country_nameUruguay -4.329 1.60e-05 ***
## country_nameVenezuela -4.392 1.20e-05 ***
## country_nameViet Nam -4.414 1.09e-05 ***
## country_nameZimbabwe -4.262 2.16e-05 ***
## year 4.228 2.51e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.058 on 1419 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9498, Adjusted R-squared: 0.9468
## F-statistic: 323.2 on 83 and 1419 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + country_name + year, comb_data_sel_cats_final %>% mutate(year = factor(year, levels = unique(comb_data_sel_cats_final$year)))))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## country_name + year, data = comb_data_sel_cats_final %>%
## mutate(year = factor(year, levels = unique(comb_data_sel_cats_final$year))))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63123 -0.00988 0.00143 0.00903 0.32887
##
## Coefficients:
## Estimate Std. Error
## PKcal 2.4888383 0.0828765
## log(GDP_pcap_thous2015USD_FAO) -0.0172616 0.0136035
## country_nameAlbania -0.0032168 0.0194930
## country_nameAlgeria -0.1018113 0.0267651
## country_nameArgentina 0.0128481 0.0660018
## country_nameArmenia -0.0094681 0.0437690
## country_nameAustralia 0.1094901 0.0503850
## country_nameAustria 0.0406021 0.0493926
## country_nameAzerbaijan -0.0123356 0.0234365
## country_nameBelarus 0.0126635 0.0205356
## country_nameBelgium 0.0509967 0.0486085
## country_nameBenin -0.0446891 0.0211238
## country_nameBosnia and Herzegovina -0.0086569 0.0263510
## country_nameBotswana 0.0125569 0.0254353
## country_nameBrazil 0.1713305 0.0270268
## country_nameBulgaria 0.0080190 0.0238679
## country_nameCameroon -0.0707123 0.0224223
## country_nameCanada -0.0258767 0.0486685
## country_nameChina -2.4208521 0.1164805
## country_nameColombia -0.0379098 0.0221107
## country_nameCongo -0.0211668 0.0312022
## country_nameCosta Rica 0.0314666 0.0302434
## country_nameCote dIvoire -0.0615337 0.0188102
## country_nameCroatia 0.0263608 0.0320654
## country_nameCyprus 0.0422805 0.0427208
## country_nameCzech Republic 0.0266153 0.0362355
## country_nameDenmark 0.0686660 0.0524441
## country_nameDominican Republic 0.0115842 0.0225051
## country_nameEstonia 0.0325721 0.0344373
## country_nameFinland 0.0522041 0.0492483
## country_nameFrance -0.0025376 0.0448210
## country_nameGeorgia -0.0108852 0.0186349
## country_nameGermany -0.0288942 0.0452437
## country_nameGreece 0.0204976 0.0398748
## country_nameHungary 0.0207799 0.0318505
## country_nameIceland 0.0583955 0.0513579
## country_nameIreland 0.0595378 0.0498224
## country_nameIsrael 0.0263568 0.0475356
## country_nameItaly -0.0689161 0.0439441
## country_nameJamaica -0.0054702 0.0451299
## country_nameJapan -0.0741219 0.0433464
## country_nameKazakhstan -0.0058820 0.0321227
## country_nameKorea, Republic of -0.0415954 0.0381189
## country_nameKyrgyzstan -0.0391635 0.0350299
## country_nameLatvia 0.0258110 0.0303905
## country_nameLithuania 0.0241584 0.0302718
## country_nameLuxembourg 0.0643816 0.0619769
## country_nameMacedonia, the former Yugoslav Republic of 0.0080784 0.0208222
## country_nameMalta 0.0221633 0.0449119
## country_nameMexico -0.1994444 0.0279677
## country_nameMoldova, Republic of -0.0125996 0.0146988
## country_nameMontenegro 0.0063685 0.0283122
## country_nameMorocco -0.1029404 0.0199494
## country_nameMyanmar -0.1457302 0.0594315
## country_nameNetherlands 0.0859295 0.0493023
## country_nameNew Zealand 0.0575857 0.0469817
## country_nameNorway 0.0637667 0.0567040
## country_namePhilippines -0.1469947 0.0160128
## country_namePoland -0.0195705 0.0280670
## country_namePortugal 0.0269492 0.0394457
## country_nameRomania -0.0182610 0.0253831
## country_nameRussian Federation -0.0500135 0.0257369
## country_nameSenegal -0.0491594 0.0196501
## country_nameSerbia 0.0025910 0.0268443
## country_nameSlovakia 0.0222574 0.0333895
## country_nameSlovenia 0.0361381 0.0393020
## country_nameSpain -0.0006890 0.0414282
## country_nameSudan -0.0911493 0.0238181
## country_nameSweden 0.0442315 0.0504646
## country_nameSwitzerland 0.0519549 0.0583020
## country_nameTajikistan -0.0365447 0.0185327
## country_nameThailand 0.0583905 0.0207006
## country_nameTogo -0.0454896 0.0228391
## country_nameTunisia -0.0290310 0.0231206
## country_nameTurkey -0.1660692 0.0263553
## country_nameUkraine -0.0772443 0.0197426
## country_nameUnited Kingdom -0.0080342 0.0473416
## country_nameUnited States of America -0.0941511 0.0492027
## country_nameUruguay 0.0204639 0.0475859
## country_nameVenezuela -0.0248134 0.0330853
## country_nameViet Nam -0.1601418 0.0261122
## country_nameZimbabwe -0.0450413 0.0236406
## year2000 -0.0023093 0.0114012
## year2001 -0.0008630 0.0114177
## year2002 0.0003181 0.0114518
## year2003 -0.0003288 0.0113229
## year2004 0.0058889 0.0112541
## year2005 0.0073413 0.0111904
## year2006 0.0108946 0.0112970
## year2007 0.0135618 0.0114685
## year2008 0.0131365 0.0115487
## year2009 0.0103825 0.0113168
## year2010 0.0325953 0.0114433
## year2011 0.0313609 0.0115584
## year2012 0.0309708 0.0116003
## year2013 0.0315592 0.0117124
## year2014 0.0278521 0.0118521
## year2015 0.0262748 0.0119593
## year1990 -0.0090665 0.0129534
## year1991 -0.0075482 0.0128352
## year1992 -0.0064985 0.0119896
## year1993 -0.0038313 0.0119342
## year1994 -0.0079139 0.0117684
## year1995 0.0072670 0.0116687
## year1996 0.0043198 0.0115864
## year1997 0.0040912 0.0115665
## year1998 0.0025331 0.0115452
## t value Pr(>|t|)
## PKcal 30.031 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) -1.269 0.204686
## country_nameAlbania -0.165 0.868950
## country_nameAlgeria -3.804 0.000149 ***
## country_nameArgentina 0.195 0.845685
## country_nameArmenia -0.216 0.828770
## country_nameAustralia 2.173 0.029943 *
## country_nameAustria 0.822 0.411202
## country_nameAzerbaijan -0.526 0.598734
## country_nameBelarus 0.617 0.537559
## country_nameBelgium 1.049 0.294299
## country_nameBenin -2.116 0.034558 *
## country_nameBosnia and Herzegovina -0.329 0.742567
## country_nameBotswana 0.494 0.621610
## country_nameBrazil 6.339 3.11e-10 ***
## country_nameBulgaria 0.336 0.736940
## country_nameCameroon -3.154 0.001647 **
## country_nameCanada -0.532 0.595024
## country_nameChina -20.783 < 2e-16 ***
## country_nameColombia -1.715 0.086650 .
## country_nameCongo -0.678 0.497647
## country_nameCosta Rica 1.040 0.298314
## country_nameCote dIvoire -3.271 0.001097 **
## country_nameCroatia 0.822 0.411163
## country_nameCyprus 0.990 0.322495
## country_nameCzech Republic 0.735 0.462761
## country_nameDenmark 1.309 0.190642
## country_nameDominican Republic 0.515 0.606819
## country_nameEstonia 0.946 0.344395
## country_nameFinland 1.060 0.289320
## country_nameFrance -0.057 0.954859
## country_nameGeorgia -0.584 0.559225
## country_nameGermany -0.639 0.523165
## country_nameGreece 0.514 0.607300
## country_nameHungary 0.652 0.514238
## country_nameIceland 1.137 0.255721
## country_nameIreland 1.195 0.232291
## country_nameIsrael 0.554 0.579349
## country_nameItaly -1.568 0.117045
## country_nameJamaica -0.121 0.903542
## country_nameJapan -1.710 0.087490 .
## country_nameKazakhstan -0.183 0.854739
## country_nameKorea, Republic of -1.091 0.275372
## country_nameKyrgyzstan -1.118 0.263758
## country_nameLatvia 0.849 0.395854
## country_nameLithuania 0.798 0.424978
## country_nameLuxembourg 1.039 0.299078
## country_nameMacedonia, the former Yugoslav Republic of 0.388 0.698096
## country_nameMalta 0.493 0.621748
## country_nameMexico -7.131 1.59e-12 ***
## country_nameMoldova, Republic of -0.857 0.391489
## country_nameMontenegro 0.225 0.822059
## country_nameMorocco -5.160 2.82e-07 ***
## country_nameMyanmar -2.452 0.014325 *
## country_nameNetherlands 1.743 0.081570 .
## country_nameNew Zealand 1.226 0.220517
## country_nameNorway 1.125 0.260971
## country_namePhilippines -9.180 < 2e-16 ***
## country_namePoland -0.697 0.485746
## country_namePortugal 0.683 0.494596
## country_nameRomania -0.719 0.472005
## country_nameRussian Federation -1.943 0.052186 .
## country_nameSenegal -2.502 0.012472 *
## country_nameSerbia 0.097 0.923122
## country_nameSlovakia 0.667 0.505139
## country_nameSlovenia 0.919 0.357994
## country_nameSpain -0.017 0.986733
## country_nameSudan -3.827 0.000136 ***
## country_nameSweden 0.876 0.380917
## country_nameSwitzerland 0.891 0.373010
## country_nameTajikistan -1.972 0.048818 *
## country_nameThailand 2.821 0.004860 **
## country_nameTogo -1.992 0.046594 *
## country_nameTunisia -1.256 0.209460
## country_nameTurkey -6.301 3.95e-10 ***
## country_nameUkraine -3.913 9.57e-05 ***
## country_nameUnited Kingdom -0.170 0.865265
## country_nameUnited States of America -1.914 0.055884 .
## country_nameUruguay 0.430 0.667232
## country_nameVenezuela -0.750 0.453392
## country_nameViet Nam -6.133 1.12e-09 ***
## country_nameZimbabwe -1.905 0.056952 .
## year2000 -0.203 0.839514
## year2001 -0.076 0.939758
## year2002 0.028 0.977844
## year2003 -0.029 0.976835
## year2004 0.523 0.600875
## year2005 0.656 0.511909
## year2006 0.964 0.335023
## year2007 1.183 0.237198
## year2008 1.137 0.255530
## year2009 0.917 0.359069
## year2010 2.848 0.004458 **
## year2011 2.713 0.006745 **
## year2012 2.670 0.007677 **
## year2013 2.695 0.007134 **
## year2014 2.350 0.018914 *
## year2015 2.197 0.028183 *
## year1990 -0.700 0.484085
## year1991 -0.588 0.556570
## year1992 -0.542 0.587900
## year1993 -0.321 0.748231
## year1994 -0.672 0.501392
## year1995 0.623 0.533532
## year1996 0.373 0.709327
## year1997 0.354 0.723612
## year1998 0.219 0.826363
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05815 on 1395 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9503, Adjusted R-squared: 0.9465
## F-statistic: 249.5 on 107 and 1395 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58086 -0.00977 -0.00108 0.00567 0.61242
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.60310 0.02297 113.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08127 on 1560 degrees of freedom
## Multiple R-squared: 0.8917, Adjusted R-squared: 0.8916
## F-statistic: 1.284e+04 on 1 and 1560 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56486 -0.01802 -0.00852 -0.00180 0.60979
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.565471 0.024796 103.461 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.003794 0.000839 4.523 6.59e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08226 on 1500 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.8932, Adjusted R-squared: 0.893
## F-statistic: 6270 on 2 and 1500 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) + country_name, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) +
## country_name, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65167 -0.00442 -0.00026 0.00430 0.33552
##
## Coefficients:
## Estimate Std. Error
## nonstaples 2.5542374 0.0728038
## log(GDP_pcap_thous2015USD_FAO) 0.0128560 0.0072962
## country_nameAlbania -0.0166793 0.0152961
## country_nameAlgeria -0.0558582 0.0227602
## country_nameArgentina 0.0046493 0.0578369
## country_nameArmenia -0.0173925 0.0397439
## country_nameAustralia 0.0214200 0.0289305
## country_nameAustria -0.0534695 0.0284247
## country_nameAzerbaijan -0.0181387 0.0182700
## country_nameBelarus -0.0039858 0.0143683
## country_nameBelgium -0.0318133 0.0296278
## country_nameBenin -0.0017668 0.0166941
## country_nameBosnia and Herzegovina -0.0215416 0.0218393
## country_nameBotswana -0.0231070 0.0178390
## country_nameBrazil 0.3336491 0.0171860
## country_nameBulgaria -0.0205480 0.0156510
## country_nameCameroon -0.0249473 0.0193738
## country_nameCanada -0.0808413 0.0309763
## country_nameChina -0.1617539 0.0373526
## country_nameColombia -0.0312452 0.0148398
## country_nameCongo -0.0138425 0.0281389
## country_nameCosta Rica -0.0194101 0.0187388
## country_nameCote dIvoire -0.0125155 0.0159041
## country_nameCroatia -0.0273278 0.0199185
## country_nameCyprus -0.0411528 0.0251494
## country_nameCzech Republic -0.0307625 0.0222999
## country_nameDenmark -0.0339678 0.0300383
## country_nameDominican Republic -0.0161034 0.0149824
## country_nameEstonia -0.0300345 0.0210922
## country_nameFinland -0.0437311 0.0283819
## country_nameFrance -0.0359428 0.0260457
## country_nameGeorgia -0.0154950 0.0151404
## country_nameGermany -0.0534961 0.0262842
## country_nameGreece -0.0450263 0.0235751
## country_nameHungary -0.0293105 0.0195227
## country_nameIceland -0.0462600 0.0294839
## country_nameIreland -0.0372712 0.0286907
## country_nameIsrael -0.0563993 0.0295241
## country_nameItaly -0.0836523 0.0257111
## country_nameJamaica -0.0235030 0.0403508
## country_nameJapan -0.0030511 0.0257549
## country_nameKazakhstan -0.0294927 0.0238756
## country_nameKorea, Republic of -0.0420381 0.0229441
## country_nameKyrgyzstan -0.0056270 0.0315756
## country_nameLatvia -0.0259118 0.0190854
## country_nameLithuania -0.0252586 0.0190373
## country_nameLuxembourg -0.0599220 0.0364846
## country_nameMacedonia, the former Yugoslav Republic of -0.0177380 0.0144889
## country_nameMalta -0.0405656 0.0317584
## country_nameMexico -0.1047590 0.0178036
## country_nameMoldova, Republic of -0.0026796 0.0118018
## country_nameMontenegro -0.0239416 0.0216599
## country_nameMorocco -0.0438731 0.0169064
## country_nameMyanmar -0.0612133 0.0547100
## country_nameNetherlands -0.0042222 0.0283488
## country_nameNew Zealand -0.0340054 0.0272097
## country_nameNorway -0.0490762 0.0322619
## country_namePhilippines -0.0178592 0.0117044
## country_namePoland -0.0240899 0.0177101
## country_namePortugal -0.0387763 0.0233723
## country_nameRomania -0.0254073 0.0167495
## country_nameRussian Federation 0.1081884 0.0169914
## country_nameSenegal -0.0122882 0.0165114
## country_nameSerbia -0.0139690 0.0210023
## country_nameSlovakia -0.0331376 0.0207265
## country_nameSlovenia -0.0372763 0.0235434
## country_nameSpain -0.0461902 0.0243461
## country_nameSudan -0.0439849 0.0210708
## country_nameSweden -0.0520881 0.0289946
## country_nameSwitzerland -0.0632993 0.0330706
## country_nameTajikistan 0.0005776 0.0136251
## country_nameThailand 0.1289773 0.0143007
## country_nameTogo 0.0043057 0.0164737
## country_nameTunisia -0.0279479 0.0188484
## country_nameTurkey -0.0857180 0.0170876
## country_nameUkraine -0.0198358 0.0166997
## country_nameUnited Kingdom -0.0467951 0.0274221
## country_nameUnited States of America 0.0935583 0.0301240
## country_nameUruguay -0.0312270 0.0375382
## country_nameVenezuela -0.0585624 0.0202096
## country_nameViet Nam -0.0099869 0.0226960
## country_nameZimbabwe -0.0108141 0.0207358
## t value Pr(>|t|)
## nonstaples 35.084 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 1.762 0.07828 .
## country_nameAlbania -1.090 0.27571
## country_nameAlgeria -2.454 0.01424 *
## country_nameArgentina 0.080 0.93594
## country_nameArmenia -0.438 0.66173
## country_nameAustralia 0.740 0.45918
## country_nameAustria -1.881 0.06016 .
## country_nameAzerbaijan -0.993 0.32097
## country_nameBelarus -0.277 0.78151
## country_nameBelgium -1.074 0.28311
## country_nameBenin -0.106 0.91573
## country_nameBosnia and Herzegovina -0.986 0.32412
## country_nameBotswana -1.295 0.19542
## country_nameBrazil 19.414 < 2e-16 ***
## country_nameBulgaria -1.313 0.18943
## country_nameCameroon -1.288 0.19807
## country_nameCanada -2.610 0.00916 **
## country_nameChina -4.330 1.59e-05 ***
## country_nameColombia -2.105 0.03542 *
## country_nameCongo -0.492 0.62284
## country_nameCosta Rica -1.036 0.30046
## country_nameCote dIvoire -0.787 0.43145
## country_nameCroatia -1.372 0.17029
## country_nameCyprus -1.636 0.10199
## country_nameCzech Republic -1.379 0.16796
## country_nameDenmark -1.131 0.25832
## country_nameDominican Republic -1.075 0.28264
## country_nameEstonia -1.424 0.15468
## country_nameFinland -1.541 0.12359
## country_nameFrance -1.380 0.16781
## country_nameGeorgia -1.023 0.30628
## country_nameGermany -2.035 0.04201 *
## country_nameGreece -1.910 0.05635 .
## country_nameHungary -1.501 0.13349
## country_nameIceland -1.569 0.11687
## country_nameIreland -1.299 0.19413
## country_nameIsrael -1.910 0.05630 .
## country_nameItaly -3.254 0.00117 **
## country_nameJamaica -0.582 0.56035
## country_nameJapan -0.118 0.90571
## country_nameKazakhstan -1.235 0.21694
## country_nameKorea, Republic of -1.832 0.06713 .
## country_nameKyrgyzstan -0.178 0.85859
## country_nameLatvia -1.358 0.17478
## country_nameLithuania -1.327 0.18479
## country_nameLuxembourg -1.642 0.10073
## country_nameMacedonia, the former Yugoslav Republic of -1.224 0.22106
## country_nameMalta -1.277 0.20170
## country_nameMexico -5.884 4.98e-09 ***
## country_nameMoldova, Republic of -0.227 0.82042
## country_nameMontenegro -1.105 0.26920
## country_nameMorocco -2.595 0.00955 **
## country_nameMyanmar -1.119 0.26339
## country_nameNetherlands -0.149 0.88163
## country_nameNew Zealand -1.250 0.21160
## country_nameNorway -1.521 0.12844
## country_namePhilippines -1.526 0.12727
## country_namePoland -1.360 0.17397
## country_namePortugal -1.659 0.09732 .
## country_nameRomania -1.517 0.12951
## country_nameRussian Federation 6.367 2.59e-10 ***
## country_nameSenegal -0.744 0.45686
## country_nameSerbia -0.665 0.50608
## country_nameSlovakia -1.599 0.11009
## country_nameSlovenia -1.583 0.11358
## country_nameSpain -1.897 0.05800 .
## country_nameSudan -2.087 0.03702 *
## country_nameSweden -1.796 0.07263 .
## country_nameSwitzerland -1.914 0.05581 .
## country_nameTajikistan 0.042 0.96619
## country_nameThailand 9.019 < 2e-16 ***
## country_nameTogo 0.261 0.79385
## country_nameTunisia -1.483 0.13836
## country_nameTurkey -5.016 5.93e-07 ***
## country_nameUkraine -1.188 0.23511
## country_nameUnited Kingdom -1.706 0.08814 .
## country_nameUnited States of America 3.106 0.00194 **
## country_nameUruguay -0.832 0.40562
## country_nameVenezuela -2.898 0.00382 **
## country_nameViet Nam -0.440 0.65998
## country_nameZimbabwe -0.522 0.60209
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05468 on 1420 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9553, Adjusted R-squared: 0.9527
## F-statistic: 370.2 on 82 and 1420 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) + country_name + year, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) +
## country_name + year, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63980 -0.00731 0.00005 0.00729 0.33015
##
## Coefficients:
## Estimate Std. Error
## nonstaples 2.7110895 0.0755921
## log(GDP_pcap_thous2015USD_FAO) -0.0544931 0.0125015
## country_nameAlbania -4.7337397 0.7164597
## country_nameAlgeria -4.7654709 0.7155217
## country_nameArgentina -4.6248769 0.7053152
## country_nameArmenia -4.7377907 0.7178770
## country_nameAustralia -4.5050914 0.6879565
## country_nameAustria -4.5860496 0.6888568
## country_nameAzerbaijan -4.7161247 0.7136317
## country_nameBelarus -4.6990147 0.7130961
## country_nameBelgium -4.5765340 0.6907480
## country_nameBenin -4.8126436 0.7307326
## country_nameBosnia and Herzegovina -4.7232422 0.7142929
## country_nameBotswana -4.6956434 0.7097577
## country_nameBrazil -4.3290635 0.7082507
## country_nameBulgaria -4.6916156 0.7094846
## country_nameCameroon -4.8096787 0.7268279
## country_nameCanada -4.6297312 0.6914379
## country_nameChina -4.9536077 0.7285890
## country_nameColombia -4.7112870 0.7108301
## country_nameCongo -4.7554032 0.7205553
## country_nameCosta Rica -4.6541263 0.7040391
## country_nameCote dIvoire -4.7877585 0.7253057
## country_nameCroatia -4.6539763 0.7028459
## country_nameCyprus -4.6101515 0.6942599
## country_nameCzech Republic -4.6357699 0.6996307
## country_nameDenmark -4.5511851 0.6865926
## country_nameDominican Republic -4.6958032 0.7107811
## country_nameEstonia -4.6440269 0.7009581
## country_nameFinland -4.5774752 0.6890318
## country_nameFrance -4.5883811 0.6917790
## country_nameGeorgia -4.7438927 0.7181777
## country_nameGermany -4.6034163 0.6914056
## country_nameGreece -4.6267095 0.6961314
## country_nameHungary -4.6545243 0.7026173
## country_nameIceland -4.5696582 0.6875070
## country_nameIreland -4.5680607 0.6885959
## country_nameIsrael -4.6087587 0.6919026
## country_nameItaly -4.6396886 0.6923128
## country_nameJamaica -4.7222947 0.7146340
## country_nameJapan -4.5585190 0.6922281
## country_nameKazakhstan -4.6808591 0.7067170
## country_nameKorea, Republic of -4.6286353 0.6968567
## country_nameKyrgyzstan -4.8040181 0.7293155
## country_nameLatvia -4.6621098 0.7042730
## country_nameLithuania -4.6619409 0.7043453
## country_nameLuxembourg -4.5364949 0.6807323
## country_nameMacedonia, the former Yugoslav Republic of -4.7116331 0.7129263
## country_nameMalta -4.6328077 0.6980489
## country_nameMexico -4.7478790 0.7052912
## country_nameMoldova, Republic of -4.7669967 0.7235706
## country_nameMontenegro -4.6994937 0.7103187
## country_nameMorocco -4.7803206 0.7194380
## country_nameMyanmar -4.8592000 0.7305819
## country_nameNetherlands -4.5367152 0.6888406
## country_nameNew Zealand -4.5795707 0.6907796
## country_nameNorway -4.5445722 0.6833957
## country_namePhilippines -4.7520828 0.7189999
## country_namePoland -4.6679514 0.7054015
## country_namePortugal -4.6229328 0.6965001
## country_nameRomania -4.6878577 0.7082007
## country_nameRussian Federation -4.5575097 0.7086994
## country_nameSenegal -4.7986161 0.7270017
## country_nameSerbia -4.6992699 0.7117792
## country_nameSlovakia -4.6533571 0.7018927
## country_nameSlovenia -4.6250579 0.6970559
## country_nameSpain -4.6174825 0.6945802
## country_nameSudan -4.8007080 0.7226224
## country_nameSweden -4.5791903 0.6880488
## country_nameSwitzerland -4.5503669 0.6821548
## country_nameTajikistan -4.8128568 0.7310591
## country_nameThailand -4.5584758 0.7119446
## country_nameTogo -4.8326433 0.7346859
## country_nameTunisia -4.7376904 0.7154310
## country_nameTurkey -4.7368063 0.7064837
## country_nameUkraine -4.7624520 0.7203698
## country_nameUnited Kingdom -4.5857498 0.6897846
## country_nameUnited States of America -4.4747085 0.6943411
## country_nameUruguay -4.6478301 0.7020215
## country_nameVenezuela -4.6754008 0.7013648
## country_nameViet Nam -4.7751387 0.7239495
## country_nameZimbabwe -4.7944841 0.7267033
## year 0.0023855 0.0003622
## t value Pr(>|t|)
## nonstaples 35.865 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) -4.359 1.40e-05 ***
## country_nameAlbania -6.607 5.53e-11 ***
## country_nameAlgeria -6.660 3.90e-11 ***
## country_nameArgentina -6.557 7.66e-11 ***
## country_nameArmenia -6.600 5.80e-11 ***
## country_nameAustralia -6.549 8.10e-11 ***
## country_nameAustria -6.657 3.97e-11 ***
## country_nameAzerbaijan -6.609 5.47e-11 ***
## country_nameBelarus -6.590 6.20e-11 ***
## country_nameBelgium -6.625 4.90e-11 ***
## country_nameBenin -6.586 6.34e-11 ***
## country_nameBosnia and Herzegovina -6.612 5.34e-11 ***
## country_nameBotswana -6.616 5.22e-11 ***
## country_nameBrazil -6.112 1.27e-09 ***
## country_nameBulgaria -6.613 5.33e-11 ***
## country_nameCameroon -6.617 5.17e-11 ***
## country_nameCanada -6.696 3.08e-11 ***
## country_nameChina -6.799 1.55e-11 ***
## country_nameColombia -6.628 4.82e-11 ***
## country_nameCongo -6.600 5.81e-11 ***
## country_nameCosta Rica -6.611 5.40e-11 ***
## country_nameCote dIvoire -6.601 5.75e-11 ***
## country_nameCroatia -6.622 5.03e-11 ***
## country_nameCyprus -6.640 4.44e-11 ***
## country_nameCzech Republic -6.626 4.88e-11 ***
## country_nameDenmark -6.629 4.80e-11 ***
## country_nameDominican Republic -6.607 5.55e-11 ***
## country_nameEstonia -6.625 4.91e-11 ***
## country_nameFinland -6.643 4.36e-11 ***
## country_nameFrance -6.633 4.67e-11 ***
## country_nameGeorgia -6.605 5.59e-11 ***
## country_nameGermany -6.658 3.95e-11 ***
## country_nameGreece -6.646 4.27e-11 ***
## country_nameHungary -6.625 4.93e-11 ***
## country_nameIceland -6.647 4.26e-11 ***
## country_nameIreland -6.634 4.64e-11 ***
## country_nameIsrael -6.661 3.88e-11 ***
## country_nameItaly -6.702 2.96e-11 ***
## country_nameJamaica -6.608 5.50e-11 ***
## country_nameJapan -6.585 6.38e-11 ***
## country_nameKazakhstan -6.623 4.97e-11 ***
## country_nameKorea, Republic of -6.642 4.39e-11 ***
## country_nameKyrgyzstan -6.587 6.30e-11 ***
## country_nameLatvia -6.620 5.09e-11 ***
## country_nameLithuania -6.619 5.12e-11 ***
## country_nameLuxembourg -6.664 3.80e-11 ***
## country_nameMacedonia, the former Yugoslav Republic of -6.609 5.46e-11 ***
## country_nameMalta -6.637 4.55e-11 ***
## country_nameMexico -6.732 2.43e-11 ***
## country_nameMoldova, Republic of -6.588 6.26e-11 ***
## country_nameMontenegro -6.616 5.21e-11 ***
## country_nameMorocco -6.645 4.32e-11 ***
## country_nameMyanmar -6.651 4.14e-11 ***
## country_nameNetherlands -6.586 6.35e-11 ***
## country_nameNew Zealand -6.630 4.77e-11 ***
## country_nameNorway -6.650 4.17e-11 ***
## country_namePhilippines -6.609 5.45e-11 ***
## country_namePoland -6.617 5.17e-11 ***
## country_namePortugal -6.637 4.53e-11 ***
## country_nameRomania -6.619 5.10e-11 ***
## country_nameRussian Federation -6.431 1.73e-10 ***
## country_nameSenegal -6.601 5.77e-11 ***
## country_nameSerbia -6.602 5.71e-11 ***
## country_nameSlovakia -6.630 4.77e-11 ***
## country_nameSlovenia -6.635 4.60e-11 ***
## country_nameSpain -6.648 4.23e-11 ***
## country_nameSudan -6.643 4.35e-11 ***
## country_nameSweden -6.655 4.03e-11 ***
## country_nameSwitzerland -6.671 3.64e-11 ***
## country_nameTajikistan -6.583 6.46e-11 ***
## country_nameThailand -6.403 2.07e-10 ***
## country_nameTogo -6.578 6.69e-11 ***
## country_nameTunisia -6.622 5.01e-11 ***
## country_nameTurkey -6.705 2.90e-11 ***
## country_nameUkraine -6.611 5.38e-11 ***
## country_nameUnited Kingdom -6.648 4.22e-11 ***
## country_nameUnited States of America -6.445 1.58e-10 ***
## country_nameUruguay -6.621 5.06e-11 ***
## country_nameVenezuela -6.666 3.75e-11 ***
## country_nameViet Nam -6.596 5.95e-11 ***
## country_nameZimbabwe -6.598 5.88e-11 ***
## year 6.585 6.38e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05388 on 1419 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9566, Adjusted R-squared: 0.9541
## F-statistic: 377.2 on 83 and 1419 DF, p-value: < 2.2e-16
# summary(lm(energy ~ 0 + PKcal + Feed_Kt, comb_data_sel_cats_final))
summary(lm(energy ~ 0 + staples + nonstaples, comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + staples + nonstaples, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65301 -0.01313 -0.00181 0.00369 0.56450
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## staples -0.48420 0.03101 -15.61 <2e-16 ***
## nonstaples 3.18511 0.04297 74.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0756 on 1559 degrees of freedom
## Multiple R-squared: 0.9063, Adjusted R-squared: 0.9062
## F-statistic: 7541 on 2 and 1559 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + staples + nonstaples + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_final))
##
## Call:
## lm(formula = energy ~ 0 + staples + nonstaples + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65272 -0.01356 -0.00268 0.00259 0.56432
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## staples -0.4804410 0.0328896 -14.608 <2e-16 ***
## nonstaples 3.1771517 0.0478751 66.363 <2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.0004853 0.0008173 0.594 0.553
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.07699 on 1499 degrees of freedom
## (59 observations deleted due to missingness)
## Multiple R-squared: 0.9065, Adjusted R-squared: 0.9063
## F-statistic: 4843 on 3 and 1499 DF, p-value: < 2.2e-16
# summary(lm(energy ~ 0 + staples + nonstaples_other + nonstaples_animal, comb_data_sel_cats_final))
# summary(lm(energy ~ 0 + staples + nonstaples_other + nonstaples_animal + Feed_Kt, comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + GDP_pcap_thous2015USD_FAO, comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + nonstaples_frac, comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + exp(nonstaples_frac), comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + nonstaples_frac, comb_data_sel_cats_final, weights = comb_data_sel_cats_final$PKcal))
# summary(lm(EJ_per_PKcal ~ 0 + log(GDP_pcap_thous2015USD_FAO) + nonstaples_frac, comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + log(GDP_pcap_thous2015USD_FAO) + nonstaples_frac, comb_data_sel_cats_final, weights = comb_data_sel_cats_final$PKcal))
# summary(lm(EJ_per_PKcal ~ 0 + log(GDP_pcap_thous2015USD_FAO) + nonstaples_frac + I(nonstaples_frac ^ 2), comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + log(GDP_pcap_thous2015USD_FAO) + nonstaples_frac + I(nonstaples_frac ^ 2), comb_data_sel_cats_final, weights = comb_data_sel_cats_final$PKcal))
# summary(lm(EJ_per_PKcal ~ 0 + nonstaples_frac + I(nonstaples_frac ^ 2), comb_data_sel_cats_final))
# summary(lm(EJ_per_PKcal ~ 0 + nonstaples_frac + I(nonstaples_frac ^ 2), comb_data_sel_cats_final, weights = comb_data_sel_cats_final$PKcal))
# prep data with size for plotting
comb_data_sel_cats_final_to_plot <- comb_data_sel_cats_final %>%
mutate(size_to_plot = case_when(PKcal < 0.05 ~ 0.05,
PKcal > 0.75 ~ 0.75,
TRUE ~ PKcal))
# plot the linear model between food production and energy use
# save model to plot
lm_to_plot_pkcal <- lm(energy ~ 0 + PKcal, comb_data_sel_cats_final)
lm_to_plot_pkcal_summary <- summary(lm_to_plot_pkcal)
print(lm_to_plot_pkcal_summary)
##
## Call:
## lm(formula = energy ~ 0 + PKcal, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74877 -0.00110 0.00436 0.01927 0.80324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.03354 0.01518 68.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1239 on 1560 degrees of freedom
## Multiple R-squared: 0.7483, Adjusted R-squared: 0.7482
## F-statistic: 4638 on 1 and 1560 DF, p-value: < 2.2e-16
pval_pkcal <- lm_to_plot_pkcal_summary$coef[1,4]
Rsq_pkcal <- lm_to_plot_pkcal_summary$adj.r.squared
x_for_lin_reg_plot_PKcal <- c(min(comb_data_sel_cats_final$PKcal), max(comb_data_sel_cats_final$PKcal))
plot_ly(comb_data_sel_cats_final_to_plot) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~country_name) %>%
add_trace(x = seq(from = x_for_lin_reg_plot_PKcal[1], to = x_for_lin_reg_plot_PKcal[2], length.out = 10),
y = predict(lm_to_plot_pkcal, data.frame(PKcal = seq(from = x_for_lin_reg_plot_PKcal[1], to = x_for_lin_reg_plot_PKcal[2], length.out = 10))),
type = "scatter",
mode = "lines",
name = "Linear regression",
text = paste0("R squared: ", round(Rsq_pkcal, 3))) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption")
# save as ggplot
ggplot(comb_data_sel_cats_final_to_plot,
aes(x = PKcal, y = energy, color = country_name)) +
geom_point() +
guides(color = "none") +
geom_smooth(aes(group = 1), method = "lm", formula = y ~ 0 + x, se = FALSE) +
labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption") +
theme_classic() +
theme(text = element_text(size=20))
ggsave(paste0(fig_dir, "/energy_PKcal_relationship_country_sel.png"), width = 12, height = 10)
# # plot the linear model between nonstaples fraction and the energy per calorie coefficient
# # save models to plot
# lm_to_plot_ns_frac <- lm(EJ_per_PKcal ~ 0 + nonstaples_frac, comb_data_sel_cats_final, weights = comb_data_sel_cats_final$PKcal)
# lm_to_plot_ns_frac_summary <- summary(lm_to_plot_ns_frac)
# print(lm_to_plot_ns_frac_summary)
# pval <- lm_to_plot_ns_frac_summary$coef[1,4]
# Rsq <- lm_to_plot_ns_frac_summary$adj.r.squared
#
# lm_to_plot_ns_frac_sq <- lm(EJ_per_PKcal ~ 0 + nonstaples_frac + I(nonstaples_frac ^ 2), comb_data_sel_cats_final, weights = comb_data_sel_cats_final$PKcal)
# lm_to_plot_ns_frac_sq_summary <- summary(lm_to_plot_ns_frac_sq)
# print(lm_to_plot_ns_frac_sq_summary)
# pval_sq <- lm_to_plot_ns_frac_sq_summary$coef[1,4]
# Rsq_sq <- lm_to_plot_ns_frac_sq_summary$adj.r.squared
#
# lm_to_plot_ns_frac_sq_unweight <- lm(EJ_per_PKcal ~ 0 + nonstaples_frac + I(nonstaples_frac ^ 2), comb_data_sel_cats_final)
# lm_to_plot_ns_frac_sq_unweight_summary <- summary(lm_to_plot_ns_frac_sq_unweight)
# print(lm_to_plot_ns_frac_sq_unweight_summary)
# pval_sq_unweight <- lm_to_plot_ns_frac_sq_unweight_summary$coef[1,4]
# Rsq_sq_unweight <- lm_to_plot_ns_frac_sq_unweight_summary$adj.r.squared
#
# x_for_lin_reg_plot <- c(min(comb_data_sel_cats_final$nonstaples_frac), max(comb_data_sel_cats_final$nonstaples_frac))
#
# plot_ly(comb_data_sel_cats_final_to_plot) %>%
# add_trace(y = ~EJ_per_PKcal,
# x = ~nonstaples_frac,
# type = "scatter",
# mode = "markers",
# color = ~country_name,
# text = ~paste0(year, "; PKcal = ", PKcal),
# size = ~size_to_plot,
# sizes = c(5, 150)) %>%
# #marker = ~list(size = size_to_plot*50)) %>%
# add_trace(x = seq(from = x_for_lin_reg_plot[1], to = x_for_lin_reg_plot[2], length.out = 10),
# y = predict(lm_to_plot_ns_frac, data.frame(nonstaples_frac = seq(from = x_for_lin_reg_plot[1], to = x_for_lin_reg_plot[2], length.out = 10))),
# type = "scatter",
# mode = "lines",
# name = "Linear regression",
# text = paste0("R squared: ", round(Rsq, 3))) %>%
# add_trace(x = seq(from = x_for_lin_reg_plot[1], to = x_for_lin_reg_plot[2], length.out = 10),
# y = predict(lm_to_plot_ns_frac_sq, data.frame(nonstaples_frac = seq(from = x_for_lin_reg_plot[1], to = x_for_lin_reg_plot[2], length.out = 10))),
# type = "scatter",
# mode = "lines",
# name = "Quadratic regression",
# text = paste0("R squared: ", round(Rsq_sq, 3))) %>%
# add_trace(x = seq(from = x_for_lin_reg_plot[1], to = x_for_lin_reg_plot[2], length.out = 10),
# y = predict(lm_to_plot_ns_frac_sq_unweight, data.frame(nonstaples_frac = seq(from = x_for_lin_reg_plot[1], to = x_for_lin_reg_plot[2], length.out = 10))),
# type = "scatter",
# mode = "lines",
# name = "Quadratic regression, unweighted",
# text = paste0("R squared: ", round(Rsq_sq_unweight, 3))) %>%
# layout(xaxis = list(title = "Nonstaples fraction"),
# yaxis = list(title = "EJ / PKcal"),
# title = "Food processing energy use coefficient vs nonstaples fraction")
#
# # save as ggplots
# ggplot(comb_data_sel_cats_final_to_plot,
# aes(x = nonstaples_frac, y = EJ_per_PKcal, color = country_name, size = PKcal)) +
# geom_point() +
# guides(color = "none", size = "none") +
# geom_smooth(aes(group = 1), method = "lm", formula = y ~ 0 + x + I(x ^ 2), se = FALSE) +
# labs(x = "Nonstaples fraction", y = "EJ / PKcal", title = "Energy coefficient vs fraction of calories consumed from nonstaples") +
# theme_classic() +
# theme(text = element_text(size=20))
# ggsave(paste0(fig_dir, "/coef_ns_relationship_sel.png"), width = 12, height = 10)
#
# ggplot(comb_data_sel_cats_final_to_plot,
# aes(x = nonstaples_frac, y = EJ_per_PKcal, color = country_name, size = PKcal)) +
# geom_point() +
# guides(color = "none", size = "none") +
# geom_smooth(aes(group = 1, weight = PKcal), method = "lm", formula = y ~ 0 + x + I(x ^ 2), se = FALSE) +
# labs(x = "Nonstaples fraction", y = "EJ / PKcal", title = "Energy coefficient vs fraction of calories consumed from nonstaples") +
# theme_classic() +
# theme(text = element_text(size=20))
# ggsave(paste0(fig_dir, "/coef_ns_relationship_sel_weighted.png"), width = 12, height = 10)
#
# ggplot(comb_data_sel_cats_final_to_plot,
# aes(x = nonstaples_frac, y = EJ_per_PKcal, color = country_name, size = PKcal)) +
# geom_point() +
# guides(color = "none", size = "none") +
# geom_smooth(aes(group = 1, weight = PKcal), method = "lm", formula = y ~ 0 + x , se = FALSE) +
# labs(x = "Nonstaples fraction", y = "EJ / PKcal", title = "Energy coefficient vs fraction of calories consumed from nonstaples") +
# theme_classic() +
# theme(text = element_text(size=20))
# ggsave(paste0(fig_dir, "/coef_ns_relationship_sel_weighted_linear.png"), width = 12, height = 10)
#
# ggplot(comb_data_sel_cats_final_to_plot,
# aes(x = nonstaples_frac, y = EJ_per_PKcal, color = country_name, size = PKcal)) +
# geom_point() +
# guides(color = "none", size = "none") +
# geom_smooth(aes(group = 1, weight = PKcal, color = "s"), method = "lm", formula = y ~ 0 + x , se = FALSE) +
# geom_smooth(aes(group = 1, weight = PKcal, color = "n"), method = "lm", formula = y ~ 0 + x + I(x ^ 2), se = FALSE) +
# labs(x = "Nonstaples fraction", y = "EJ / PKcal", title = "Energy coefficient vs fraction of calories consumed from nonstaples") +
# theme_classic() +
# theme(text = element_text(size=20))
# ggsave(paste0(fig_dir, "/coef_ns_relationship_sel_weighted_both.png"), width = 12, height = 10)
Using the same thresholds as the country level.
IEA_ind_sectors_GCAM_region_total_en_frac_wider <- IEA_ind_sectors_GCAM_region_total_en %>%
dplyr::select(-value) %>%
pivot_wider(names_from = FLOW, values_from = frac) %>%
left_join(GCAM_region_names)
# get regions and years to include
GCAM_reg_years_incl <- IEA_ind_sectors_GCAM_region_total_en_frac_wider %>%
# select only desired years in which INONSPEC fraction is not too high and food processing fraction is not too low, or in which food processing fraction is high even if INONSPEC fraction is high
filter(year >= min_year & ((INONSPEC < max_frac_inonspec & (!is.na(FOODPRO) & FOODPRO > min_frac_foodpro)) | FOODPRO > override_frac_foodpro)) %>%
dplyr::select(GCAM_region_ID, year) %>%
left_join(GCAM_region_names)
ggplot(IEA_ind_sectors_GCAM_region_fuel %>% filter(FLOW == "FOODPRO") %>% right_join(GCAM_reg_years_incl),
aes(x = year, y = value, fill = fuel)) +
geom_col() +
facet_wrap(~region, scale = "free_y") +
scale_fill_manual(values = pal_sel, drop = TRUE, limits = force) +
labs(x = "", y = "EJ", title = "Food processing energy use (IEA)") +
theme(axis.text.x = element_text(size=5)) +
scale_x_continuous(breaks = c(1990, 2000, 2010)) +
theme_classic()
ggsave(paste0(fig_dir, "/foodpro_energy_use_by_GCAM_region_fuel_hist_IEA_sel.png"), width = 35, height = 20)
# aggregate the food and energy data, dropping NA values
comb_data_all_cats_GCAM_reg <- comb_data_all_cats %>%
group_by(GCAM_region_ID, year) %>%
summarize(energy = sum(energy),
PKcal_w_na = sum(PKcal, na.rm = FALSE),
PKcal = sum(PKcal, na.rm = TRUE),
Food_Kt = sum(Food_Kt, na.rm = TRUE),
Food_Kt_w_calorie = sum(Food_Kt_w_calorie, na.rm = TRUE),
GDP_total_thous2015USD_FAO = sum(GDP_pcap_thous2015USD_FAO * pop, na.rm = TRUE),
pop = sum(pop, na.rm = TRUE),
Feed_Kt = sum(Feed_Kt, na.rm = TRUE),
nonstaples_animal = sum(nonstaples_animal, na.rm = TRUE),
nonstaples_other = sum(nonstaples_other, na.rm = TRUE),
staples = sum(staples, na.rm = TRUE)) %>%
mutate(GDP_pcap_thous2015USD_FAO = GDP_total_thous2015USD_FAO / pop,
nonstaples = nonstaples_animal + nonstaples_other,
staples_frac = staples / PKcal,
nonstaples_frac = (nonstaples_animal + nonstaples_other) / PKcal,
EJ_per_PKcal = energy / PKcal) %>%
ungroup() %>%
left_join(GCAM_region_names)
comb_data_sel_cats_GCAM_reg <- comb_data_all_cats_GCAM_reg %>%
group_by(region) %>%
filter(!all(energy == 0) & !all(is.na(PKcal) | PKcal == 0)) %>%
ungroup()
comb_data_sel_cats_GCAM_reg_final <- comb_data_sel_cats_GCAM_reg %>%
right_join(GCAM_reg_years_incl) %>%
filter(!is.na(PKcal),
PKcal > 0)
# print output of linear models
summary(lm(energy ~ 0 + PKcal, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85107 -0.01511 0.01045 0.11280 0.77794
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.09445 0.02872 38.11 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2537 on 435 degrees of freedom
## Multiple R-squared: 0.7695, Adjusted R-squared: 0.769
## F-statistic: 1452 on 1 and 435 DF, p-value: < 2.2e-16
summary(lm(energy ~ PKcal, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ PKcal, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74835 -0.10406 -0.08288 0.03422 0.73279
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10176 0.01393 7.305 1.34e-12 ***
## PKcal 0.95816 0.03293 29.097 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2397 on 434 degrees of freedom
## Multiple R-squared: 0.6611, Adjusted R-squared: 0.6603
## F-statistic: 846.6 on 1 and 434 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68736 -0.14943 -0.08501 0.04138 0.67067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 0.948804 0.029177 32.52 <2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.051694 0.004672 11.06 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.23 on 407 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.8227, Adjusted R-squared: 0.8218
## F-statistic: 944.1 on 2 and 407 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63261 -0.03080 0.00416 0.03117 0.30104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.71512 0.17141 10.006 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.16356 0.03078 5.314 1.81e-07 ***
## regionArgentina -0.41024 0.13577 -3.022 0.00268 **
## regionAustralia_NZ -0.52298 0.11291 -4.632 4.96e-06 ***
## regionBrazil -0.01268 0.05099 -0.249 0.80379
## regionCanada -0.64028 0.11439 -5.597 4.12e-08 ***
## regionCentral Asia -0.36862 0.08754 -4.211 3.17e-05 ***
## regionChina -1.49419 0.23204 -6.439 3.55e-10 ***
## regionColombia -0.26636 0.04692 -5.677 2.69e-08 ***
## regionEU-12 -0.37178 0.05683 -6.542 1.93e-10 ***
## regionEU-15 -0.51932 0.08756 -5.931 6.68e-09 ***
## regionEurope_Eastern -0.18509 0.04045 -4.576 6.40e-06 ***
## regionEurope_Non_EU -0.45221 0.05202 -8.693 < 2e-16 ***
## regionEuropean Free Trade Association -0.69582 0.13108 -5.309 1.86e-07 ***
## regionJapan -0.58211 0.09474 -6.144 1.99e-09 ***
## regionMexico -0.47815 0.05814 -8.224 3.00e-15 ***
## regionRussia -0.24998 0.05069 -4.931 1.21e-06 ***
## regionSouth America_Northern -0.42749 0.07338 -5.826 1.19e-08 ***
## regionSouth Korea -0.50901 0.08534 -5.965 5.52e-09 ***
## regionSoutheast Asia -0.46815 0.05448 -8.592 < 2e-16 ***
## regionUSA -0.42928 0.09195 -4.669 4.18e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1113 on 388 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9604, Adjusted R-squared: 0.9582
## F-statistic: 447.8 on 21 and 388 DF, p-value: < 2.2e-16
summary(lm(energy ~ PKcal + log(GDP_pcap_thous2015USD_FAO) + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ PKcal + log(GDP_pcap_thous2015USD_FAO) +
## region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63261 -0.03080 0.00416 0.03117 0.30104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.41024 0.13577 -3.022 0.002682 **
## PKcal 1.71512 0.17141 10.006 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.16356 0.03078 5.314 1.81e-07 ***
## regionAustralia_NZ -0.11274 0.11826 -0.953 0.341057
## regionBrazil 0.39757 0.12241 3.248 0.001264 **
## regionCanada -0.23004 0.12054 -1.908 0.057084 .
## regionCentral Asia 0.04162 0.14256 0.292 0.770476
## regionChina -1.08395 0.30086 -3.603 0.000356 ***
## regionColombia 0.14388 0.11912 1.208 0.227851
## regionEU-12 0.03847 0.11747 0.327 0.743504
## regionEU-15 -0.10907 0.13582 -0.803 0.422425
## regionEurope_Eastern 0.22516 0.12813 1.757 0.079666 .
## regionEurope_Non_EU -0.04196 0.11855 -0.354 0.723566
## regionEuropean Free Trade Association -0.28558 0.12480 -2.288 0.022657 *
## regionJapan -0.17187 0.11476 -1.498 0.135035
## regionMexico -0.06790 0.11688 -0.581 0.561599
## regionRussia 0.16026 0.12109 1.323 0.186454
## regionSouth America_Northern -0.01725 0.11383 -0.152 0.879636
## regionSouth Korea -0.09876 0.11356 -0.870 0.385017
## regionSoutheast Asia -0.05790 0.15096 -0.384 0.701521
## regionUSA -0.01904 0.12552 -0.152 0.879531
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1113 on 388 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9323, Adjusted R-squared: 0.9288
## F-statistic: 267 on 20 and 388 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64795 -0.01905 0.00024 0.02269 0.35150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.312624 0.128753 17.962 < 2e-16 ***
## regionArgentina 0.001514 0.112003 0.014 0.989224
## regionAustralia_NZ 0.065496 0.022334 2.933 0.003547 **
## regionBrazil 0.183721 0.035537 5.170 3.65e-07 ***
## regionCanada -0.059897 0.034347 -1.744 0.081919 .
## regionCentral Asia -0.175384 0.080020 -2.192 0.028952 *
## regionChina -2.196125 0.190183 -11.547 < 2e-16 ***
## regionColombia -0.047468 0.022654 -2.095 0.036749 *
## regionEU-12 -0.109201 0.028496 -3.832 0.000147 ***
## regionEU-15 -0.286060 0.076445 -3.742 0.000208 ***
## regionEurope_Eastern -0.063382 0.033541 -1.890 0.059494 .
## regionEurope_Non_EU -0.215491 0.027231 -7.913 2.30e-14 ***
## regionEuropean Free Trade Association -0.009011 0.022052 -0.409 0.683038
## regionJapan -0.100616 0.028342 -3.550 0.000429 ***
## regionMexico -0.205020 0.027586 -7.432 6.16e-13 ***
## regionRussia -0.048545 0.032250 -1.505 0.133013
## regionSouth America_Northern -0.056373 0.022723 -2.481 0.013502 *
## regionSouth Korea -0.072224 0.023301 -3.100 0.002070 **
## regionSoutheast Asia -0.534924 0.053101 -10.074 < 2e-16 ***
## regionTaiwan -0.034926 0.022195 -1.574 0.116350
## regionUSA -0.071141 0.063386 -1.122 0.262365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared: 0.9573, Adjusted R-squared: 0.9551
## F-statistic: 443 on 21 and 415 DF, p-value: < 2.2e-16
summary(lm(energy ~ PKcal + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ PKcal + region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64795 -0.01905 0.00024 0.02269 0.35150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.001514 0.112003 0.014 0.9892
## PKcal 2.312624 0.128753 17.962 < 2e-16 ***
## regionAustralia_NZ 0.063982 0.113960 0.561 0.5748
## regionBrazil 0.182207 0.115912 1.572 0.1167
## regionCanada -0.061410 0.116777 -0.526 0.5993
## regionCentral Asia -0.176897 0.137052 -1.291 0.1975
## regionChina -2.197639 0.214945 -10.224 < 2e-16 ***
## regionColombia -0.048981 0.113939 -0.430 0.6675
## regionEU-12 -0.110715 0.114519 -0.967 0.3342
## regionEU-15 -0.287574 0.131964 -2.179 0.0299 *
## regionEurope_Eastern -0.064896 0.116397 -0.558 0.5775
## regionEurope_Non_EU -0.217005 0.114330 -1.898 0.0584 .
## regionEuropean Free Trade Association -0.010524 0.114016 -0.092 0.9265
## regionJapan -0.102129 0.114495 -0.892 0.3729
## regionMexico -0.206533 0.114381 -1.806 0.0717 .
## regionRussia -0.050059 0.115246 -0.434 0.6642
## regionSouth America_Northern -0.057887 0.114049 -0.508 0.6120
## regionSouth Korea -0.073738 0.113942 -0.647 0.5179
## regionSoutheast Asia -0.536438 0.121366 -4.420 1.26e-05 ***
## regionTaiwan -0.036440 0.113980 -0.320 0.7494
## regionUSA -0.072654 0.125584 -0.579 0.5632
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared: 0.9295, Adjusted R-squared: 0.9261
## F-statistic: 273.5 on 20 and 415 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + region + year, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## region + year, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61580 -0.03662 0.00549 0.03991 0.32639
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.928201 0.187475 10.285 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.055057 0.050486 1.091 0.27615
## regionArgentina -7.251543 2.538537 -2.857 0.00451 **
## regionAustralia_NZ -7.206280 2.478948 -2.907 0.00386 **
## regionBrazil -6.922305 2.560780 -2.703 0.00717 **
## regionCanada -7.352138 2.489586 -2.953 0.00334 **
## regionCentral Asia -7.344978 2.586462 -2.840 0.00475 **
## regionChina -8.770635 2.706006 -3.241 0.00129 **
## regionColombia -7.192773 2.566920 -2.802 0.00533 **
## regionEU-12 -7.251406 2.549787 -2.844 0.00469 **
## regionEU-15 -7.340049 2.528833 -2.903 0.00391 **
## regionEurope_Eastern -7.195836 2.598058 -2.770 0.00588 **
## regionEurope_Non_EU -7.351914 2.557125 -2.875 0.00426 **
## regionEuropean Free Trade Association -7.316584 2.456686 -2.978 0.00308 **
## regionJapan -7.316857 2.497247 -2.930 0.00359 **
## regionMexico -7.352857 2.547995 -2.886 0.00412 **
## regionRussia -7.164485 2.562580 -2.796 0.00544 **
## regionSouth America_Northern -7.253788 2.530449 -2.867 0.00438 **
## regionSouth Korea -7.287756 2.513210 -2.900 0.00395 **
## regionSoutheast Asia -7.522180 2.614346 -2.877 0.00423 **
## regionUSA -7.186757 2.505562 -2.868 0.00435 **
## year 0.003535 0.001310 2.699 0.00726 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1104 on 387 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9611, Adjusted R-squared: 0.9589
## F-statistic: 434.7 on 22 and 387 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + region + year, comb_data_sel_cats_GCAM_reg_final %>% mutate(year = factor(year, levels = unique(comb_data_sel_cats_GCAM_reg_final$year)))))
##
## Call:
## lm(formula = energy ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## region + year, data = comb_data_sel_cats_GCAM_reg_final %>%
## mutate(year = factor(year, levels = unique(comb_data_sel_cats_GCAM_reg_final$year))))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60474 -0.03378 0.00807 0.03720 0.29055
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.227448 0.207435 10.738 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) -0.006260 0.053891 -0.116 0.90759
## regionArgentina -0.067873 0.163828 -0.414 0.67890
## regionAustralia_NZ 0.050776 0.187745 0.270 0.78697
## regionBrazil 0.173903 0.077403 2.247 0.02526 *
## regionCanada -0.111226 0.179458 -0.620 0.53579
## regionCentral Asia -0.251572 0.096881 -2.597 0.00979 **
## regionChina -2.105264 0.270825 -7.774 7.95e-14 ***
## regionColombia -0.075114 0.072729 -1.033 0.30239
## regionEU-12 -0.124681 0.090681 -1.375 0.17000
## regionEU-15 -0.256482 0.123858 -2.071 0.03909 *
## regionEurope_Eastern -0.125518 0.050845 -2.469 0.01402 *
## regionEurope_Non_EU -0.233684 0.082078 -2.847 0.00466 **
## regionEuropean Free Trade Association -0.021556 0.218977 -0.098 0.92164
## regionJapan -0.107920 0.158668 -0.680 0.49684
## regionMexico -0.221323 0.093117 -2.377 0.01798 *
## regionRussia -0.060554 0.077013 -0.786 0.43221
## regionSouth America_Northern -0.076427 0.118854 -0.643 0.52061
## regionSouth Korea -0.089694 0.140334 -0.639 0.52313
## regionSoutheast Asia -0.536929 0.062512 -8.589 2.62e-16 ***
## regionUSA -0.048330 0.145760 -0.332 0.74040
## year1991 0.003424 0.042136 0.081 0.93527
## year1992 -0.001554 0.042159 -0.037 0.97061
## year1993 0.009966 0.041569 0.240 0.81066
## year1994 -0.004799 0.041609 -0.115 0.90825
## year1995 0.042853 0.041700 1.028 0.30479
## year1996 0.031094 0.041824 0.743 0.45770
## year1997 0.029372 0.042017 0.699 0.48496
## year1998 0.022760 0.042012 0.542 0.58832
## year1999 0.011829 0.042098 0.281 0.77887
## year2000 0.003052 0.042397 0.072 0.94265
## year2001 0.006904 0.042510 0.162 0.87108
## year2002 0.009390 0.042648 0.220 0.82586
## year2003 0.003156 0.042839 0.074 0.94131
## year2004 0.023725 0.042828 0.554 0.57995
## year2005 0.027075 0.042969 0.630 0.52902
## year2006 0.047460 0.044296 1.071 0.28468
## year2007 0.054660 0.045076 1.213 0.22606
## year2008 0.050589 0.045373 1.115 0.26560
## year2009 0.030780 0.044102 0.698 0.48566
## year2010 0.120702 0.046142 2.616 0.00927 **
## year2011 0.116460 0.046850 2.486 0.01337 *
## year2012 0.114404 0.047251 2.421 0.01596 *
## year2013 0.119638 0.048418 2.471 0.01393 *
## year2014 0.097201 0.047647 2.040 0.04207 *
## year2015 0.090727 0.047674 1.903 0.05783 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1115 on 363 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9628, Adjusted R-squared: 0.9581
## F-statistic: 204.5 on 46 and 363 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58454 -0.03084 -0.00833 0.05096 0.61644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.57670 0.03693 69.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1513 on 435 degrees of freedom
## Multiple R-squared: 0.918, Adjusted R-squared: 0.9178
## F-statistic: 4869 on 1 and 435 DF, p-value: < 2.2e-16
summary(lm(energy ~ nonstaples, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ nonstaples, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.56417 -0.05481 -0.03459 0.03206 0.60096
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029797 0.009128 3.264 0.00118 **
## nonstaples 2.482717 0.046507 53.383 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1497 on 434 degrees of freedom
## Multiple R-squared: 0.8678, Adjusted R-squared: 0.8675
## F-statistic: 2850 on 1 and 434 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53776 -0.06620 -0.03202 0.02696 0.59977
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.453793 0.045278 54.194 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.016059 0.003363 4.775 2.51e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1522 on 407 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9223, Adjusted R-squared: 0.922
## F-statistic: 2417 on 2 and 407 DF, p-value: < 2.2e-16
summary(lm(energy ~ nonstaples + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ nonstaples + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.54891 -0.06975 -0.02574 0.03099 0.60584
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.028953 0.020552 -1.409 0.159679
## nonstaples 2.476326 0.047969 51.624 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.024889 0.007111 3.500 0.000517 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.152 on 406 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.8679, Adjusted R-squared: 0.8672
## F-statistic: 1334 on 2 and 406 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) +
## region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64529 -0.02304 0.00092 0.02460 0.31488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.200651 0.175370 12.549 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.083484 0.031438 2.656 0.008244 **
## regionArgentina -0.175191 0.132949 -1.318 0.188372
## regionAustralia_NZ -0.221773 0.115739 -1.916 0.056082 .
## regionBrazil 0.238036 0.053226 4.472 1.02e-05 ***
## regionCanada -0.332028 0.117373 -2.829 0.004914 **
## regionCentral Asia -0.186897 0.085750 -2.180 0.029892 *
## regionChina -0.065979 0.076003 -0.868 0.385869
## regionColombia -0.127932 0.048573 -2.634 0.008781 **
## regionEU-12 -0.146127 0.060553 -2.413 0.016275 *
## regionEU-15 -0.136517 0.079709 -1.713 0.087568 .
## regionEurope_Eastern -0.067632 0.040721 -1.661 0.097544 .
## regionEurope_Non_EU -0.213839 0.056843 -3.762 0.000195 ***
## regionEuropean Free Trade Association -0.352192 0.134034 -2.628 0.008939 **
## regionJapan -0.221951 0.102108 -2.174 0.030333 *
## regionMexico -0.232315 0.062932 -3.692 0.000255 ***
## regionRussia 0.004944 0.054617 0.091 0.927920
## regionSouth America_Northern -0.221960 0.075707 -2.932 0.003569 **
## regionSouth Korea -0.236169 0.089791 -2.630 0.008873 **
## regionSoutheast Asia -0.040181 0.030135 -1.333 0.183192
## regionUSA -0.062089 0.091760 -0.677 0.499034
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1053 on 388 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9645, Adjusted R-squared: 0.9626
## F-statistic: 502.6 on 21 and 388 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65396 -0.01592 -0.00002 0.01969 0.33582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.5314864 0.1189007 21.291 < 2e-16
## regionArgentina 0.0401654 0.1031191 0.390 0.697102
## regionAustralia_NZ 0.0806606 0.0204120 3.952 9.12e-05
## regionBrazil 0.3621971 0.0252941 14.319 < 2e-16
## regionCanada -0.0320123 0.0313586 -1.021 0.307922
## regionCentral Asia -0.0748663 0.0730905 -1.024 0.306292
## regionChina -0.1532522 0.0665663 -2.302 0.021815
## regionColombia -0.0113681 0.0204473 -0.556 0.578530
## regionEU-12 0.0029355 0.0225874 0.130 0.896659
## regionEU-15 0.0247097 0.0513317 0.481 0.630504
## regionEurope_Eastern 0.0034103 0.0300955 0.113 0.909836
## regionEurope_Non_EU -0.0742120 0.0213833 -3.471 0.000574
## regionEuropean Free Trade Association -0.0004722 0.0202714 -0.023 0.981427
## regionJapan 0.0429533 0.0218885 1.962 0.050387
## regionMexico -0.0756466 0.0217520 -3.478 0.000559
## regionRussia 0.1282448 0.0237003 5.411 1.06e-07
## regionSouth America_Northern -0.0288557 0.0207183 -1.393 0.164437
## regionSouth Korea -0.0041704 0.0205028 -0.203 0.838916
## regionSoutheast Asia -0.0038444 0.0263813 -0.146 0.884209
## regionTaiwan -0.0160890 0.0203103 -0.792 0.428722
## regionUSA 0.1510049 0.0445830 3.387 0.000774
##
## nonstaples ***
## regionArgentina
## regionAustralia_NZ ***
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina *
## regionColombia
## regionEU-12
## regionEU-15
## regionEurope_Eastern
## regionEurope_Non_EU ***
## regionEuropean Free Trade Association
## regionJapan .
## regionMexico ***
## regionRussia ***
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared: 0.9637, Adjusted R-squared: 0.9619
## F-statistic: 525 on 21 and 415 DF, p-value: < 2.2e-16
summary(lm(energy ~ nonstaples + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ nonstaples + region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65396 -0.01592 -0.00002 0.01969 0.33582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.040165 0.103119 0.390 0.69710
## nonstaples 2.531486 0.118901 21.291 < 2e-16 ***
## regionAustralia_NZ 0.040495 0.105016 0.386 0.69998
## regionBrazil 0.322032 0.105631 3.049 0.00245 **
## regionCanada -0.072178 0.107632 -0.671 0.50285
## regionCentral Asia -0.115032 0.126224 -0.911 0.36265
## regionChina -0.193418 0.120761 -1.602 0.10999
## regionColombia -0.051534 0.105015 -0.491 0.62388
## regionEU-12 -0.037230 0.105201 -0.354 0.72360
## regionEU-15 -0.015456 0.113624 -0.136 0.89187
## regionEurope_Eastern -0.036755 0.107260 -0.343 0.73202
## regionEurope_Non_EU -0.114377 0.105061 -1.089 0.27693
## regionEuropean Free Trade Association -0.040638 0.105036 -0.387 0.69903
## regionJapan 0.002788 0.105113 0.027 0.97885
## regionMexico -0.115812 0.105098 -1.102 0.27113
## regionRussia 0.088079 0.105415 0.836 0.40389
## regionSouth America_Northern -0.069021 0.105103 -0.657 0.51174
## regionSouth Korea -0.044336 0.105013 -0.422 0.67310
## regionSoutheast Asia -0.044010 0.105893 -0.416 0.67791
## regionTaiwan -0.056254 0.105027 -0.536 0.59251
## regionUSA 0.110839 0.110993 0.999 0.31856
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared: 0.9401, Adjusted R-squared: 0.9372
## F-statistic: 325.7 on 20 and 415 DF, p-value: < 2.2e-16
summary(lm(energy ~ nonstaples + log(GDP_pcap_thous2015USD_FAO) + region, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ nonstaples + log(GDP_pcap_thous2015USD_FAO) +
## region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64529 -0.02304 0.00092 0.02460 0.31488
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.17519 0.13295 -1.318 0.188372
## nonstaples 2.20065 0.17537 12.549 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.08348 0.03144 2.656 0.008244 **
## regionAustralia_NZ -0.04658 0.11224 -0.415 0.678346
## regionBrazil 0.41323 0.11335 3.646 0.000303 ***
## regionCanada -0.15684 0.11453 -1.369 0.171678
## regionCentral Asia -0.01171 0.13476 -0.087 0.930825
## regionChina 0.10921 0.16862 0.648 0.517567
## regionColombia 0.04726 0.11359 0.416 0.677615
## regionEU-12 0.02906 0.11040 0.263 0.792498
## regionEU-15 0.03867 0.11803 0.328 0.743340
## regionEurope_Eastern 0.10756 0.12236 0.879 0.379938
## regionEurope_Non_EU -0.03865 0.11111 -0.348 0.728164
## regionEuropean Free Trade Association -0.17700 0.11902 -1.487 0.137780
## regionJapan -0.04676 0.10903 -0.429 0.668254
## regionMexico -0.05712 0.10968 -0.521 0.602790
## regionRussia 0.18014 0.11255 1.600 0.110311
## regionSouth America_Northern -0.04677 0.10775 -0.434 0.664490
## regionSouth Korea -0.06098 0.10751 -0.567 0.570932
## regionSoutheast Asia 0.13501 0.12760 1.058 0.290687
## regionUSA 0.11310 0.11346 0.997 0.319446
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1053 on 388 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9394, Adjusted R-squared: 0.9363
## F-statistic: 300.6 on 20 and 388 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) + region + year, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + log(GDP_pcap_thous2015USD_FAO) +
## region + year, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61594 -0.02625 0.00106 0.03239 0.31054
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.736257 0.196523 13.923 < 2e-16
## log(GDP_pcap_thous2015USD_FAO) -0.146194 0.052421 -2.789 0.00555
## regionArgentina -13.322715 2.449387 -5.439 9.51e-08
## regionAustralia_NZ -13.047349 2.388740 -5.462 8.44e-08
## regionBrazil -13.039109 2.470669 -5.278 2.19e-07
## regionCanada -13.211115 2.398758 -5.507 6.65e-08
## regionCentral Asia -13.609232 2.498519 -5.447 9.13e-08
## regionChina -13.772925 2.551153 -5.399 1.17e-07
## regionColombia -13.464103 2.481559 -5.426 1.02e-07
## regionEU-12 -13.370903 2.461086 -5.433 9.82e-08
## regionEU-15 -13.210957 2.433640 -5.428 1.01e-07
## regionEurope_Eastern -13.572235 2.512759 -5.401 1.16e-07
## regionEurope_Non_EU -13.474342 2.467649 -5.460 8.51e-08
## regionEuropean Free Trade Association -13.046226 2.365195 -5.516 6.36e-08
## regionJapan -13.133323 2.404108 -5.463 8.40e-08
## regionMexico -13.442602 2.458447 -5.468 8.18e-08
## regionRussia -13.280390 2.472221 -5.372 1.35e-07
## regionSouth America_Northern -13.347815 2.443082 -5.464 8.37e-08
## regionSouth Korea -13.253671 2.423381 -5.469 8.13e-08
## regionSoutheast Asia -13.554640 2.514453 -5.391 1.22e-07
## regionUSA -13.017293 2.411868 -5.397 1.18e-07
## year 0.006825 0.001270 5.375 1.33e-07
##
## nonstaples ***
## log(GDP_pcap_thous2015USD_FAO) **
## regionArgentina ***
## regionAustralia_NZ ***
## regionBrazil ***
## regionCanada ***
## regionCentral Asia ***
## regionChina ***
## regionColombia ***
## regionEU-12 ***
## regionEU-15 ***
## regionEurope_Eastern ***
## regionEurope_Non_EU ***
## regionEuropean Free Trade Association ***
## regionJapan ***
## regionMexico ***
## regionRussia ***
## regionSouth America_Northern ***
## regionSouth Korea ***
## regionSoutheast Asia ***
## regionUSA ***
## year ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1017 on 387 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.967, Adjusted R-squared: 0.9651
## F-statistic: 515.5 on 22 and 387 DF, p-value: < 2.2e-16
# summary(lm(energy ~ 0 + PKcal + Feed_Kt, comb_data_sel_cats_GCAM_reg_final))
summary(lm(energy ~ 0 + staples + nonstaples, comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + staples + nonstaples, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60034 -0.03229 -0.00884 0.05340 0.58710
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## staples -0.39135 0.05075 -7.711 8.6e-14 ***
## nonstaples 2.98542 0.06334 47.137 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1421 on 434 degrees of freedom
## Multiple R-squared: 0.9279, Adjusted R-squared: 0.9275
## F-statistic: 2791 on 2 and 434 DF, p-value: < 2.2e-16
summary(lm(energy ~ 0 + staples + nonstaples + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_GCAM_reg_final))
##
## Call:
## lm(formula = energy ~ 0 + staples + nonstaples + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.60119 -0.05044 -0.02420 0.03912 0.58169
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## staples -0.343209 0.055553 -6.178 1.58e-09 ***
## nonstaples 2.869546 0.080045 35.849 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.008628 0.003436 2.511 0.0124 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1457 on 406 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.929, Adjusted R-squared: 0.9285
## F-statistic: 1771 on 3 and 406 DF, p-value: < 2.2e-16
# plot the linear model between food production and energy use
# save model to plot
lm_to_plot_pkcal_GCAM <- lm(energy ~ 0 + PKcal, comb_data_sel_cats_final)
lm_to_plot_pkcal_GCAM_summary <- summary(lm_to_plot_pkcal_GCAM)
print(lm_to_plot_pkcal_GCAM_summary)
##
## Call:
## lm(formula = energy ~ 0 + PKcal, data = comb_data_sel_cats_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.74877 -0.00110 0.00436 0.01927 0.80324
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.03354 0.01518 68.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1239 on 1560 degrees of freedom
## Multiple R-squared: 0.7483, Adjusted R-squared: 0.7482
## F-statistic: 4638 on 1 and 1560 DF, p-value: < 2.2e-16
pval_pkcal_GCAM <- lm_to_plot_pkcal_GCAM_summary$coef[1,4]
Rsq_pkcal_GCAM <- lm_to_plot_pkcal_GCAM_summary$adj.r.squared
coef_pkcal_GCAM <- lm_to_plot_pkcal_GCAM_summary$coefficients[1]
x_for_lin_reg_plot_PKcal_GCAM <- c(min(comb_data_sel_cats_GCAM_reg_final$PKcal), max(comb_data_sel_cats_GCAM_reg_final$PKcal))
plot_ly(comb_data_sel_cats_GCAM_reg_final) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region) %>%
add_trace(x = seq(from = x_for_lin_reg_plot_PKcal_GCAM[1], to = x_for_lin_reg_plot_PKcal_GCAM[2], length.out = 10),
y = predict(lm_to_plot_pkcal_GCAM, data.frame(PKcal = seq(from = x_for_lin_reg_plot_PKcal_GCAM[1], to = x_for_lin_reg_plot_PKcal_GCAM[2], length.out = 10))),
type = "scatter",
mode = "lines",
name = "Linear regression",
text = paste0("R squared: ", round(Rsq_pkcal, 3))) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption")
# save as ggplot
ggplot(comb_data_sel_cats_GCAM_reg_final,
aes(x = PKcal, y = energy, color = region)) +
geom_point() +
scale_color_manual(values = palette36) +
geom_smooth(aes(group = 1), method = "lm", formula = y ~ 0 + x, se = FALSE) +
labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption") +
theme_classic() +
theme(text = element_text(size=20))
ggsave(paste0(fig_dir, "/energy_PKcal_relationship_GCAM_reg_sel.png"), width = 12, height = 10)
# also plot the version with the regional fixed effects
lm_to_plot_pkcal_GCAM_reg <- lm(energy ~ 0 + PKcal + region, comb_data_sel_cats_GCAM_reg_final)
lm_to_plot_pkcal_GCAM_reg_summary <- summary(lm_to_plot_pkcal_GCAM_reg)
print(lm_to_plot_pkcal_GCAM_reg_summary)
##
## Call:
## lm(formula = energy ~ 0 + PKcal + region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64795 -0.01905 0.00024 0.02269 0.35150
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.312624 0.128753 17.962 < 2e-16 ***
## regionArgentina 0.001514 0.112003 0.014 0.989224
## regionAustralia_NZ 0.065496 0.022334 2.933 0.003547 **
## regionBrazil 0.183721 0.035537 5.170 3.65e-07 ***
## regionCanada -0.059897 0.034347 -1.744 0.081919 .
## regionCentral Asia -0.175384 0.080020 -2.192 0.028952 *
## regionChina -2.196125 0.190183 -11.547 < 2e-16 ***
## regionColombia -0.047468 0.022654 -2.095 0.036749 *
## regionEU-12 -0.109201 0.028496 -3.832 0.000147 ***
## regionEU-15 -0.286060 0.076445 -3.742 0.000208 ***
## regionEurope_Eastern -0.063382 0.033541 -1.890 0.059494 .
## regionEurope_Non_EU -0.215491 0.027231 -7.913 2.30e-14 ***
## regionEuropean Free Trade Association -0.009011 0.022052 -0.409 0.683038
## regionJapan -0.100616 0.028342 -3.550 0.000429 ***
## regionMexico -0.205020 0.027586 -7.432 6.16e-13 ***
## regionRussia -0.048545 0.032250 -1.505 0.133013
## regionSouth America_Northern -0.056373 0.022723 -2.481 0.013502 *
## regionSouth Korea -0.072224 0.023301 -3.100 0.002070 **
## regionSoutheast Asia -0.534924 0.053101 -10.074 < 2e-16 ***
## regionTaiwan -0.034926 0.022195 -1.574 0.116350
## regionUSA -0.071141 0.063386 -1.122 0.262365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1118 on 415 degrees of freedom
## Multiple R-squared: 0.9573, Adjusted R-squared: 0.9551
## F-statistic: 443 on 21 and 415 DF, p-value: < 2.2e-16
# get coefficients for each region
coefs_pkcal_GCAM_reg <- data.frame(coef = unname(lm_to_plot_pkcal_GCAM_reg$coefficients),
pval = unname(summary(lm_to_plot_pkcal_GCAM_reg)$coef[,4]),
label = names(lm_to_plot_pkcal_GCAM_reg$coefficients)) %>%
mutate(region = ifelse(grepl("region", label), substr(label, 7, nchar(label)), NA))
overall_slope <- (coefs_pkcal_GCAM_reg %>% filter(is.na(region)))$coef[1]
coefs_pkcal_GCAM_reg_sel <- coefs_pkcal_GCAM_reg %>%
filter(!is.na(region)) %>%
dplyr::select(-label) %>%
rename(intercept = coef) %>%
# include a variable indicating the intercept only if the intercept is significant
mutate(intercept_if_sig = ifelse(pval <= max_pval, intercept, 0))
# same for the one with GDP too
lm_to_plot_pkcal_GCAM_reg_GDP <- lm(energy ~ 0 + PKcal + region + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_cats_GCAM_reg_final)
lm_to_plot_pkcal_GCAM_reg_GDP_summary <- summary(lm_to_plot_pkcal_GCAM_reg_GDP)
print(lm_to_plot_pkcal_GCAM_reg_GDP_summary)
##
## Call:
## lm(formula = energy ~ 0 + PKcal + region + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63261 -0.03080 0.00416 0.03117 0.30104
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.71512 0.17141 10.006 < 2e-16 ***
## regionArgentina -0.41024 0.13577 -3.022 0.00268 **
## regionAustralia_NZ -0.52298 0.11291 -4.632 4.96e-06 ***
## regionBrazil -0.01268 0.05099 -0.249 0.80379
## regionCanada -0.64028 0.11439 -5.597 4.12e-08 ***
## regionCentral Asia -0.36862 0.08754 -4.211 3.17e-05 ***
## regionChina -1.49419 0.23204 -6.439 3.55e-10 ***
## regionColombia -0.26636 0.04692 -5.677 2.69e-08 ***
## regionEU-12 -0.37178 0.05683 -6.542 1.93e-10 ***
## regionEU-15 -0.51932 0.08756 -5.931 6.68e-09 ***
## regionEurope_Eastern -0.18509 0.04045 -4.576 6.40e-06 ***
## regionEurope_Non_EU -0.45221 0.05202 -8.693 < 2e-16 ***
## regionEuropean Free Trade Association -0.69582 0.13108 -5.309 1.86e-07 ***
## regionJapan -0.58211 0.09474 -6.144 1.99e-09 ***
## regionMexico -0.47815 0.05814 -8.224 3.00e-15 ***
## regionRussia -0.24998 0.05069 -4.931 1.21e-06 ***
## regionSouth America_Northern -0.42749 0.07338 -5.826 1.19e-08 ***
## regionSouth Korea -0.50901 0.08534 -5.965 5.52e-09 ***
## regionSoutheast Asia -0.46815 0.05448 -8.592 < 2e-16 ***
## regionUSA -0.42928 0.09195 -4.669 4.18e-06 ***
## log(GDP_pcap_thous2015USD_FAO) 0.16356 0.03078 5.314 1.81e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1113 on 388 degrees of freedom
## (27 observations deleted due to missingness)
## Multiple R-squared: 0.9604, Adjusted R-squared: 0.9582
## F-statistic: 447.8 on 21 and 388 DF, p-value: < 2.2e-16
# get coefficients for each region
coefs_pkcal_GCAM_reg_GDP <- data.frame(coef = unname(lm_to_plot_pkcal_GCAM_reg_GDP$coefficients),
pval = unname(summary(lm_to_plot_pkcal_GCAM_reg_GDP)$coef[,4]),
label = names(lm_to_plot_pkcal_GCAM_reg_GDP$coefficients)) %>%
mutate(region = ifelse(grepl("region", label), substr(label, 7, nchar(label)), NA))
overall_slope_GDP_PKcal <- (coefs_pkcal_GCAM_reg_GDP %>% filter(label == "PKcal"))$coef[1]
overall_slope_GDP_GDP <- (coefs_pkcal_GCAM_reg_GDP %>% filter(label == "log(GDP_pcap_thous2015USD_FAO)"))$coef[1]
coefs_pkcal_GCAM_reg_GDP_sel <- coefs_pkcal_GCAM_reg_GDP %>%
filter(!is.na(region)) %>%
dplyr::select(-label) %>%
rename(intercept_w_GDP = coef,
pval_w_GDP = pval) %>%
# include a variable indicating the intercept only if the intercept is significant
mutate(intercept_w_GDP_if_sig = ifelse(pval_w_GDP <= max_pval, intercept_w_GDP, 0))
# add to the data
comb_data_sel_cats_GCAM_reg_final_2 <- comb_data_sel_cats_GCAM_reg_final %>%
left_join(coefs_pkcal_GCAM_reg_sel) %>%
left_join(coefs_pkcal_GCAM_reg_GDP_sel)
# plot
ggplot(comb_data_sel_cats_GCAM_reg_final_2,
aes(x = PKcal, y = energy, color = region)) +
geom_point() +
scale_color_manual(values = palette36) +
geom_abline(aes(slope = overall_slope, intercept = intercept, color = region)) +
labs(x = "PKcal", y = "EJ", title = "Food processing energy use vs food consumption") +
theme_classic() +
theme(text = element_text(size=20))
ggsave(paste0(fig_dir, "/energy_PKcal_relationship_GCAM_reg_sel_w_reg_fixed_effects.png"), width = 12, height = 10)
plot_ly(comb_data_sel_cats_GCAM_reg_final_2) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region) %>%
add_trace(x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "lines",
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(x = ~PKcal,
y = ~PKcal*overall_slope + intercept_if_sig,
color = ~region,
type = "scatter",
mode = "lines",
line = list(dash = "dash"),
text = "intercept if sig",
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(x = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$PKcal), length.out = 100),
y = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$PKcal), length.out = 100)*overall_slope,
type = "scatter",
mode = "lines",
color = I("darkgray"),
line = list(dash = "dash"),
name = "With regional fixed effects",
text = paste0("Slope: ", round(overall_slope, 2))) %>%
add_trace(x = seq(from = x_for_lin_reg_plot_PKcal_GCAM[1], to = x_for_lin_reg_plot_PKcal_GCAM[2], length.out = 10),
y = predict(lm_to_plot_pkcal_GCAM, data.frame(PKcal = seq(from = x_for_lin_reg_plot_PKcal_GCAM[1], to = x_for_lin_reg_plot_PKcal_GCAM[2], length.out = 10))),
type = "scatter",
mode = "lines",
color = I("black"),
line = list(dash = "dash"),
name = "Base linear regression",
text = paste0("Slope: ", round(coef_pkcal_GCAM, 2))) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption")
# plot just the intercept values by region and as they vary with 2015 calories
plot_ly(comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015)) %>%
add_trace(x = ~region,
y = ~intercept,
type = "bar") %>%
layout(xaxis = list(title = ""),
yaxis = list(title = "Intercept (PKcal)"),
title = "Intercepts of food processing energy use vs food consumption relationship")
plot_ly(comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015)) %>%
add_trace(x = ~PKcal,
y = ~intercept,
type = "scatter",
mode = "markers",
color = ~region) %>%
add_trace(x = ~PKcal,
y = ~intercept_if_sig,
type = "scatter",
mode = "markers",
marker = list(symbol = "square", size = 3),
color = I("black"),
name = "if sig") %>%
layout(xaxis = list(title = "2015 PKcal"),
yaxis = list(title = "Intercept (PKcal)"),
title = "Intercepts of food processing energy use vs food consumption relationship")
# look at linear model between the intercept and the total PKcal in 2015
summary(lm(intercept ~ 0 + PKcal + GDP_pcap_thous2015USD_FAO + nonstaples_frac, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015)))
##
## Call:
## lm(formula = intercept ~ 0 + PKcal + GDP_pcap_thous2015USD_FAO +
## nonstaples_frac, data = comb_data_sel_cats_GCAM_reg_final_2 %>%
## filter(year == 2015))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17391 -0.09687 -0.04886 -0.01825 0.41585
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal -1.232679 0.103740 -11.882 2.37e-09 ***
## GDP_pcap_thous2015USD_FAO 0.001621 0.002100 0.772 0.451
## nonstaples_frac 0.079743 0.118661 0.672 0.511
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1715 on 16 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9127, Adjusted R-squared: 0.8964
## F-statistic: 55.79 on 3 and 16 DF, p-value: 1.077e-08
summary(lm(intercept ~ 0 + PKcal, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015)))
##
## Call:
## lm(formula = intercept ~ 0 + PKcal, data = comb_data_sel_cats_GCAM_reg_final_2 %>%
## filter(year == 2015))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25888 -0.02698 0.01104 0.07173 0.46067
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal -1.14415 0.09343 -12.25 1.84e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1787 on 19 degrees of freedom
## Multiple R-squared: 0.8876, Adjusted R-squared: 0.8816
## F-statistic: 150 on 1 and 19 DF, p-value: 1.837e-10
intercept_PKcal_2015_slope <- lm(intercept ~ 0 + PKcal, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015))$coefficients[1]
# try with just intercepts that are significant
summary(lm(intercept ~ 0 + PKcal, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015 & pval <= max_pval)))
##
## Call:
## lm(formula = intercept ~ 0 + PKcal, data = comb_data_sel_cats_GCAM_reg_final_2 %>%
## filter(year == 2015 & pval <= max_pval))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.15142 -0.05160 0.00283 0.04889 0.47603
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal -1.20761 0.09638 -12.53 2.98e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1784 on 12 degrees of freedom
## Multiple R-squared: 0.929, Adjusted R-squared: 0.9231
## F-statistic: 157 on 1 and 12 DF, p-value: 2.983e-08
# try linear model between intercept and GDP for the model that includes GDP as well as fixed effects
summary(lm(intercept_w_GDP ~ 0 + PKcal + GDP_pcap_thous2015USD_FAO + nonstaples_frac, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015)))
##
## Call:
## lm(formula = intercept_w_GDP ~ 0 + PKcal + GDP_pcap_thous2015USD_FAO +
## nonstaples_frac, data = comb_data_sel_cats_GCAM_reg_final_2 %>%
## filter(year == 2015))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.17778 -0.12993 -0.08066 0.05278 0.43022
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal -0.655022 0.112138 -5.841 2.5e-05 ***
## GDP_pcap_thous2015USD_FAO -0.003949 0.002270 -1.740 0.10103
## nonstaples_frac -0.382904 0.128267 -2.985 0.00874 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1854 on 16 degrees of freedom
## (1 observation deleted due to missingness)
## Multiple R-squared: 0.9067, Adjusted R-squared: 0.8892
## F-statistic: 51.84 on 3 and 16 DF, p-value: 1.832e-08
intercept_w_GDP_PKcal_2015_slope <- lm(intercept_w_GDP ~ 0 + PKcal + GDP_pcap_thous2015USD_FAO, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015))$coefficients[1]
intercept_w_GDP_nonstaples_frac_2015_slope <- lm(intercept_w_GDP ~ 0 + PKcal + nonstaples_frac, comb_data_sel_cats_GCAM_reg_final_2 %>% filter(year == 2015))$coefficients[2]
# plot the model with squared term
# save model to plot
lm_to_plot_pkcal_sqr_GCAM <- lm(energy ~ 0 + PKcal + I(PKcal^2), comb_data_sel_cats_GCAM_reg_final)
lm_to_plot_pkcal_sqr_GCAM_summary <- summary(lm_to_plot_pkcal_sqr_GCAM)
print(lm_to_plot_pkcal_sqr_GCAM_summary)
##
## Call:
## lm(formula = energy ~ 0 + PKcal + I(PKcal^2), data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.57295 -0.07445 -0.02832 0.04683 0.57940
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.99282 0.05959 33.44 <2e-16 ***
## I(PKcal^2) -0.74510 0.04572 -16.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2001 on 434 degrees of freedom
## Multiple R-squared: 0.857, Adjusted R-squared: 0.8563
## F-statistic: 1301 on 2 and 434 DF, p-value: < 2.2e-16
pval_pkcal_sqr_GCAM <- lm_to_plot_pkcal_sqr_GCAM_summary$coef[1,4]
Rsq_pkcal_sqr_GCAM <- lm_to_plot_pkcal_sqr_GCAM_summary$adj.r.squared
coef_pkcal_sqr_GCAM <- lm_to_plot_pkcal_sqr_GCAM_summary$coefficients[1]
coef_pkcal_sqr_sqr_GCAM <- lm_to_plot_pkcal_sqr_GCAM_summary$coefficients[2]
plot_ly(comb_data_sel_cats_GCAM_reg_final) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region) %>%
add_trace(x = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$PKcal), length.out = 100),
y = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$PKcal), length.out = 100)*coef_pkcal_sqr_GCAM + seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$PKcal), length.out = 100)^2 * coef_pkcal_sqr_sqr_GCAM,
type = "scatter",
mode = "lines",
text = paste0("Slope: ", round(coef_pkcal_sqr_GCAM, 2)),
name = "Quadratic regression") %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs calorie consumption")
# plot the models with just nonstaples calories
# save model to plot
lm_to_plot_nonstaples_GCAM <- lm(energy ~ 0 + nonstaples, comb_data_sel_cats_GCAM_reg_final)
lm_to_plot_nonstaples_GCAM_summary <- summary(lm_to_plot_nonstaples_GCAM)
print(lm_to_plot_nonstaples_GCAM_summary)
##
## Call:
## lm(formula = energy ~ 0 + nonstaples, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58454 -0.03084 -0.00833 0.05096 0.61644
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.57670 0.03693 69.78 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1513 on 435 degrees of freedom
## Multiple R-squared: 0.918, Adjusted R-squared: 0.9178
## F-statistic: 4869 on 1 and 435 DF, p-value: < 2.2e-16
pval_nonstaples_GCAM <- lm_to_plot_nonstaples_GCAM_summary$coef[1,4]
Rsq_nonstaples_GCAM <- lm_to_plot_nonstaples_GCAM_summary$adj.r.squared
coef_nonstaples_GCAM <- lm_to_plot_nonstaples_GCAM_summary$coefficients[1]
plot_ly(comb_data_sel_cats_GCAM_reg_final) %>%
add_trace(y = ~energy,
x = ~nonstaples,
type = "scatter",
mode = "markers",
color = ~region) %>%
add_trace(x = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$nonstaples), length.out = 100),
y = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_2$nonstaples), length.out = 100)*coef_nonstaples_GCAM,
type = "scatter",
mode = "lines",
text = paste0("Slope: ", round(coef_nonstaples_GCAM, 2)),
name = "Linear regression") %>%
layout(xaxis = list(title = "PKcal from nonstaples"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs nonstaples consumption")
# also plot the version with the regional fixed effects
lm_to_plot_nonstaples_GCAM_reg <- lm(energy ~ 0 + nonstaples + region, comb_data_sel_cats_GCAM_reg_final)
lm_to_plot_nonstaples_GCAM_reg_summary <- summary(lm_to_plot_nonstaples_GCAM_reg)
print(lm_to_plot_nonstaples_GCAM_reg_summary)
##
## Call:
## lm(formula = energy ~ 0 + nonstaples + region, data = comb_data_sel_cats_GCAM_reg_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.65396 -0.01592 -0.00002 0.01969 0.33582
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## nonstaples 2.5314864 0.1189007 21.291 < 2e-16
## regionArgentina 0.0401654 0.1031191 0.390 0.697102
## regionAustralia_NZ 0.0806606 0.0204120 3.952 9.12e-05
## regionBrazil 0.3621971 0.0252941 14.319 < 2e-16
## regionCanada -0.0320123 0.0313586 -1.021 0.307922
## regionCentral Asia -0.0748663 0.0730905 -1.024 0.306292
## regionChina -0.1532522 0.0665663 -2.302 0.021815
## regionColombia -0.0113681 0.0204473 -0.556 0.578530
## regionEU-12 0.0029355 0.0225874 0.130 0.896659
## regionEU-15 0.0247097 0.0513317 0.481 0.630504
## regionEurope_Eastern 0.0034103 0.0300955 0.113 0.909836
## regionEurope_Non_EU -0.0742120 0.0213833 -3.471 0.000574
## regionEuropean Free Trade Association -0.0004722 0.0202714 -0.023 0.981427
## regionJapan 0.0429533 0.0218885 1.962 0.050387
## regionMexico -0.0756466 0.0217520 -3.478 0.000559
## regionRussia 0.1282448 0.0237003 5.411 1.06e-07
## regionSouth America_Northern -0.0288557 0.0207183 -1.393 0.164437
## regionSouth Korea -0.0041704 0.0205028 -0.203 0.838916
## regionSoutheast Asia -0.0038444 0.0263813 -0.146 0.884209
## regionTaiwan -0.0160890 0.0203103 -0.792 0.428722
## regionUSA 0.1510049 0.0445830 3.387 0.000774
##
## nonstaples ***
## regionArgentina
## regionAustralia_NZ ***
## regionBrazil ***
## regionCanada
## regionCentral Asia
## regionChina *
## regionColombia
## regionEU-12
## regionEU-15
## regionEurope_Eastern
## regionEurope_Non_EU ***
## regionEuropean Free Trade Association
## regionJapan .
## regionMexico ***
## regionRussia ***
## regionSouth America_Northern
## regionSouth Korea
## regionSoutheast Asia
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.103 on 415 degrees of freedom
## Multiple R-squared: 0.9637, Adjusted R-squared: 0.9619
## F-statistic: 525 on 21 and 415 DF, p-value: < 2.2e-16
# get coefficients for each region
coefs_nonstaples_GCAM_reg <- data.frame(coef = unname(lm_to_plot_nonstaples_GCAM_reg$coefficients),
pval = unname(summary(lm_to_plot_nonstaples_GCAM_reg)$coef[,4]),
label = names(lm_to_plot_nonstaples_GCAM_reg$coefficients)) %>%
mutate(region = ifelse(grepl("region", label), substr(label, 7, nchar(label)), NA))
overall_slope_nonstaples <- (coefs_nonstaples_GCAM_reg %>% filter(is.na(region)))$coef[1]
coefs_nonstaples_GCAM_reg_sel <- coefs_nonstaples_GCAM_reg %>%
filter(!is.na(region)) %>%
dplyr::select(-label) %>%
rename(intercept = coef) %>%
# include a variable indicating the intercept only if the intercept is significant
mutate(intercept_if_sig = ifelse(pval <= max_pval, intercept, 0))
comb_data_sel_cats_GCAM_reg_final_3 <- comb_data_sel_cats_GCAM_reg_final %>%
left_join(coefs_nonstaples_GCAM_reg_sel)
plot_ly(comb_data_sel_cats_GCAM_reg_final_3) %>%
add_trace(y = ~energy,
x = ~nonstaples,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region) %>%
add_trace(x = ~nonstaples,
y = ~nonstaples*overall_slope_nonstaples + intercept,
color = ~region,
type = "scatter",
mode = "lines",
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(x = ~nonstaples,
y = ~nonstaples*overall_slope_nonstaples + intercept_if_sig,
color = ~region,
type = "scatter",
mode = "lines",
line = list(dash = "dash"),
text = "intercept if sig",
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(x = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_3$nonstaples), length.out = 100),
y = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_3$nonstaples), length.out = 100)*overall_slope_nonstaples,
type = "scatter",
mode = "lines",
color = I("darkgray"),
line = list(dash = "dash"),
name = "With regional fixed effects",
text = paste0("Slope: ", round(overall_slope_nonstaples, 2))) %>%
add_trace(x = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_3$nonstaples), length.out = 100),
y = seq(from = 0, to = max(comb_data_sel_cats_GCAM_reg_final_3$nonstaples), length.out = 100)*coef_nonstaples_GCAM,
type = "scatter",
mode = "lines",
color = I("black"),
line = list(dash = "dash"),
name = "Base linear regression",
text = paste0("Slope: ", round(coef_nonstaples_GCAM, 2))) %>%
layout(xaxis = list(title = "PKcal from nonstaples"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs nonstaples consumption")
Try looking at how using this relationship would result in food processing energy use values for the regions without good energy data. Just looking at 1990 onwards for now.
# get just regions and years without good data
comb_data_sel_cats_GCAM_reg_to_infill <- comb_data_all_cats_GCAM_reg %>%
anti_join(GCAM_reg_years_incl)
# just select 1990 onwards for now
comb_data_sel_cats_GCAM_reg_to_infill_post_1990 <- comb_data_sel_cats_GCAM_reg_to_infill %>%
filter(year >= 1990)
# plot the infilled food data for these regions/years alongside the original food data for the other regions/years
plot_ly(comb_data_sel_cats_GCAM_reg_final_2) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
marker = list(symbol = "cross"),
showlegend = FALSE) %>%
add_trace(x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "lines",
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990,
x = ~PKcal,
y = ~PKcal*overall_slope,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption with infilled data")
Try adding in an intercept, which is estimated based on the PKcal consumption in 2015, using either the calculated intercepts in the original model or using the linear relationship between the derived intercept and PKcal consumption in 2015 for regions that were not in the original model.
# first calculate the intercept for each region, where it hasn't already been estimated directly
intercepts_infill_regions <- comb_data_sel_cats_GCAM_reg_to_infill %>%
bind_rows(comb_data_sel_cats_GCAM_reg_final_2) %>%
filter(year == 2015) %>%
# include indication if the intercept was estimated or pulled directly from the fixed effects model
mutate(intercept_estimated = is.na(intercept),
intercept = ifelse(is.na(intercept), PKcal * intercept_PKcal_2015_slope, intercept),
intercept_w_GDP = ifelse(is.na(intercept_w_GDP), PKcal * intercept_w_GDP_PKcal_2015_slope + nonstaples_frac * intercept_w_GDP_nonstaples_frac_2015_slope, intercept_w_GDP)) %>%
dplyr::select(region, intercept, intercept_w_GDP, intercept_estimated)
comb_data_sel_cats_GCAM_reg_to_infill_post_1990_2 <- comb_data_sel_cats_GCAM_reg_to_infill_post_1990 %>%
left_join(intercepts_infill_regions)
plot_ly(comb_data_sel_cats_GCAM_reg_final_2) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
marker = list(symbol = "cross"),
showlegend = FALSE) %>%
add_trace(x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "lines",
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_2,
x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption with infilled data and calculated/estimated intercepts")
plot_ly(comb_data_sel_cats_GCAM_reg_final_2) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
marker = list(symbol = "cross"),
showlegend = FALSE) %>%
add_trace(x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "lines",
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_2,
x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region) %>%
# also plot without the intercept
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_2,
x = ~PKcal,
y = ~PKcal*overall_slope,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square"),
legendgroup = ~region,
showlegend = FALSE) %>%
# also plot with the coefficient derived without regional fixed effects
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_2,
x = ~PKcal,
y = ~PKcal*coef_pkcal_GCAM,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up"),
legendgroup = ~region,
showlegend = FALSE) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption with infilled data and calculated/estimated intercepts\ncrosses = real data, circles = estimated with estimated intercepts,\nsquares = estimated with zero intercept, triangles = estimated with coefficient without fixed effects",
margin = list(t = 75))
This does lead to some negative values though - worth watching out for, and might not want to use exactly this way?
Compare the models.
# function to get relevant data from the linear models
get_lm_data <- function(lm_model) {
coefs_all <- data.frame(coef = unname(lm_model$coefficients),
pval = unname(summary(lm_model)$coef[,4]),
label = names(lm_model$coefficients)) %>%
mutate(region = ifelse(grepl("region", label), substr(label, 7, nchar(label)), NA))
coefs_input_vars <- coefs_all %>%
filter(is.na(region)) %>%
dplyr::select(-region) %>%
mutate(is_sig = pval <= max_pval)
regional_intercepts <- coefs_all %>%
filter(!is.na(region)) %>%
dplyr::select(-label) %>%
rename(intercept = coef) %>%
# include a variable indicating the intercept only if the intercept is significant
mutate(intercept_if_sig = ifelse(pval <= max_pval, intercept, 0))
return(list(coefs_input_vars, regional_intercepts))
}
# run the function on the models to test to get their data
m_PKcal_1 <- get_lm_data(lm(energy ~ 0 + PKcal, comb_data_sel_cats_GCAM_reg_final))
PKcal_slope_1 <- (m_PKcal_1[[1]] %>% filter(label == "PKcal"))$coef[1]
m_PKcal_2 <- get_lm_data(lm(energy ~ PKcal, comb_data_sel_cats_GCAM_reg_final))
PKcal_slope_2 <- (m_PKcal_2[[1]] %>% filter(label == "PKcal"))$coef[1]
overall_intercept_PKcal_2 <- (m_PKcal_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_PKcal_sig_2 <- (m_PKcal_2[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
m_PKcal_3 <- get_lm_data(lm(energy ~ 0 + PKcal + region, comb_data_sel_cats_GCAM_reg_final))
PKcal_slope_3 <- (m_PKcal_3[[1]] %>% filter(label == "PKcal"))$coef[1]
region_intercepts_PKcal_3 <- m_PKcal_3[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_PKcal_3", recycle0 = TRUE), contains("intercept"))
m_PKcal_4 <- get_lm_data(lm(energy ~ PKcal + region, comb_data_sel_cats_GCAM_reg_final))
PKcal_slope_4 <- (m_PKcal_4[[1]] %>% filter(label == "PKcal"))$coef[1]
overall_intercept_PKcal_4 <- (m_PKcal_4[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_PKcal_sig_4 <- (m_PKcal_4[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_PKcal_4 <- m_PKcal_4[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_PKcal_4", recycle0 = TRUE), contains("intercept"))
m_nonstaples_1 <- get_lm_data(lm(energy ~ 0 + nonstaples, comb_data_sel_cats_GCAM_reg_final))
nonstaples_slope_1 <- (m_nonstaples_1[[1]] %>% filter(label == "nonstaples"))$coef[1]
m_nonstaples_2 <- get_lm_data(lm(energy ~ nonstaples, comb_data_sel_cats_GCAM_reg_final))
nonstaples_slope_2 <- (m_nonstaples_2[[1]] %>% filter(label == "nonstaples"))$coef[1]
overall_intercept_nonstaples_2 <- (m_nonstaples_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_nonstaples_sig_2 <- (m_nonstaples_2[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
m_nonstaples_3 <- get_lm_data(lm(energy ~ 0 + nonstaples + region, comb_data_sel_cats_GCAM_reg_final))
nonstaples_slope_3 <- (m_nonstaples_3[[1]] %>% filter(label == "nonstaples"))$coef[1]
region_intercepts_nonstaples_3 <- m_nonstaples_3[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_nonstaples_3", recycle0 = TRUE), contains("intercept"))
m_nonstaples_4 <- get_lm_data(lm(energy ~ nonstaples + region, comb_data_sel_cats_GCAM_reg_final))
nonstaples_slope_4 <- (m_nonstaples_4[[1]] %>% filter(label == "nonstaples"))$coef[1]
overall_intercept_nonstaples_4 <- (m_nonstaples_4[[1]] %>% filter(label == "(Intercept)"))$coef[1]
overall_intercept_nonstaples_sig_4 <- (m_nonstaples_4[[1]] %>% filter(label == "(Intercept)"))$is_sig[1]
region_intercepts_nonstaples_4 <- m_nonstaples_4[[2]] %>% dplyr::select(region, intercept, intercept_if_sig) %>%
rename_with(~paste0(.x, "_nonstaples_4", recycle0 = TRUE), contains("intercept"))
m_nonstaples_staples_1 <- get_lm_data(lm(energy ~ 0 + nonstaples + staples, comb_data_sel_cats_GCAM_reg_final))
nonstaples_staples_ns_slope_1 <- (m_nonstaples_staples_1[[1]] %>% filter(label == "nonstaples"))$coef[1]
nonstaples_staples_s_slope_1 <- (m_nonstaples_staples_1[[1]] %>% filter(label == "staples"))$coef[1]
m_nonstaples_staples_2 <- get_lm_data(lm(energy ~ nonstaples + staples, comb_data_sel_cats_GCAM_reg_final))
nonstaples_staples_ns_slope_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "nonstaples"))$coef[1]
nonstaples_staples_s_slope_2 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "staples"))$coef[1]
overall_intercept_nonstaples_staples_4 <- (m_nonstaples_staples_2[[1]] %>% filter(label == "(Intercept)"))$coef[1]
# try infilling data based on each of these relationships for the regions without data
comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods <- comb_data_sel_cats_GCAM_reg_to_infill_post_1990 %>%
left_join(region_intercepts_PKcal_3) %>%
left_join(region_intercepts_PKcal_4) %>%
left_join(region_intercepts_nonstaples_3) %>%
left_join(region_intercepts_nonstaples_4) %>%
mutate(across(contains("intercept"), ~ replace_na(.x, 0))) %>%
mutate(energy_est_PKcal_1 = PKcal * PKcal_slope_1,
energy_est_PKcal_2 = PKcal * PKcal_slope_2 + overall_intercept_PKcal_2,
energy_est_PKcal_3 = PKcal * PKcal_slope_3 + intercept_PKcal_3,
energy_est_PKcal_4 = PKcal * PKcal_slope_4 + overall_intercept_PKcal_4 + intercept_PKcal_4,
energy_est_nonstaples_1 = nonstaples * nonstaples_slope_1,
energy_est_nonstaples_2 = nonstaples * nonstaples_slope_2 + overall_intercept_nonstaples_2,
energy_est_nonstaples_3 = nonstaples * nonstaples_slope_3 + intercept_nonstaples_3,
energy_est_nonstaples_4 = nonstaples * nonstaples_slope_4 + overall_intercept_nonstaples_4 + intercept_nonstaples_4,
energy_est_nonstaples_staples_1 = nonstaples * nonstaples_staples_ns_slope_1 + staples * nonstaples_staples_s_slope_1,
energy_est_nonstaples_staples_2 = nonstaples * nonstaples_staples_ns_slope_2 + staples * nonstaples_staples_s_slope_2)
plot_ly() %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_final %>% filter(year >= 1990),
x = ~PKcal,
y = ~energy,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "circle"),
text = ~paste0("Historical ", year),
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_1,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square"),
text = ~paste0("No intercept ", year),
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_2,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up"),
text = ~paste0("With intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_3,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "cross"),
text = ~paste0("Fixed effects, no intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_4,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "diamond"),
text = ~paste0("Fixed effects, with intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "Energy"),
title = "Food processing energy use vs calorie consumption,\nhistorical and infilled with different methods")
plot_ly() %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_final %>% filter(year >= 1990),
x = ~nonstaples,
y = ~energy,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "circle"),
text = ~paste0("Historical ", year),
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~nonstaples,
y = ~energy_est_nonstaples_1,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square"),
text = ~paste0("No intercept ", year),
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~nonstaples,
y = ~energy_est_nonstaples_2,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up"),
text = ~paste0("With intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~nonstaples,
y = ~energy_est_nonstaples_3,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "cross"),
text = ~paste0("Fixed effects, no intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~nonstaples,
y = ~energy_est_nonstaples_4,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "diamond"),
text = ~paste0("Fixed effects, with intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
layout(xaxis = list(title = "PKcal from nonstaples"),
yaxis = list(title = "Energy"),
title = "Food processing energy use vs nonstaples calorie consumption,\nhistorical and infilled with different methods")
# plot all together
plot_ly() %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_final %>% filter(year >= 1990),
x = ~PKcal,
y = ~energy,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "circle"),
text = ~paste0("Historical ", year),
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_1,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square"),
text = ~paste0("No intercept ", year),
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_2,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up"),
text = ~paste0("With intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_3,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "cross"),
text = ~paste0("Fixed effects, no intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_PKcal_4,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "diamond"),
text = ~paste0("Fixed effects, with intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_nonstaples_1,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square", line = list(color = "black", width = 2)),
text = ~paste0("Nonstaples, no intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_nonstaples_2,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up", line = list(color = "black", width = 2)),
text = ~paste0("Nonstaples, with intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_nonstaples_3,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "cross", line = list(color = "black", width = 2)),
text = ~paste0("Nonstaples, fixed effects, no intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_nonstaples_4,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "diamond", line = list(color = "black", width = 2)),
text = ~paste0("Nonstaples, fixed effects, with intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_nonstaples_staples_1,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square", line = list(color = "gray", width = 2)),
text = ~paste0("Nonstaples + staples, no intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
add_trace(data = comb_data_sel_cats_GCAM_reg_to_infill_post_1990_test_methods,
x = ~PKcal,
y = ~energy_est_nonstaples_staples_2,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up", line = list(color = "gray", width = 2)),
text = ~paste0("Nonstaples + staples, with intercept ", year),
legendgroup = ~region,
showlegend = FALSE) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "Energy"),
title = "Food processing energy use vs calorie consumption,\nhistorical and infilled with different methods")
#comb_data_sel_cats_GCAM_reg_final
#comb_data_sel_cats_GCAM_reg_to_infill_post_1990
First we will use the relationship derived above to calculate the coefficients using the calorie consumption data that is actually used in GCAM - these data use the global average values for macronutrient rate to infill for regions that are missing those data, as well as exclude NEC (not otherwise classified) calorie data from the total food calories.
# aggregate the gcamdata output into total calories - it is broken down by type right now
total_cal_food_gcamdata_GCAM_reg <- L101.CropMeat_Food_Pcal_R_C_Y %>%
group_by(GCAM_region_ID, year) %>%
summarize(PKcal = sum(value) / 1000) %>%
ungroup()
# combine with energy and GDP data on a region level
comb_data_sel_GCAM_reg_gcamdata <- IEA_foodpro_region_total_en %>%
group_by(GCAM_region_ID, year) %>%
summarize(EJ = sum(value)) %>%
ungroup() %>%
full_join(total_cal_food_gcamdata_GCAM_reg) %>%
full_join(GDP_per_cap_hist_calc_w_FAO %>%
mutate(GDP_thous2015USD_FAO = GDP_pcap_thous2015USD_FAO * pop) %>%
group_by(GCAM_region_ID, year) %>%
summarize(GDP_thous2015USD_FAO = sum(GDP_thous2015USD_FAO),
pop = sum(pop)) %>%
ungroup() %>%
mutate(GDP_pcap_thous2015USD_FAO = GDP_thous2015USD_FAO / pop)) %>%
filter(year %in% all_IEA_years) %>%
left_join(GCAM_region_names)
comb_data_sel_GCAM_reg_gcamdata_to_infill <- comb_data_sel_GCAM_reg_gcamdata %>%
anti_join(GCAM_reg_years_incl)
# just select 1990 onwards for now
comb_data_sel_GCAM_reg_gcamdata_to_infill_post_1990 <- comb_data_sel_GCAM_reg_gcamdata_to_infill %>%
filter(year >= 1990) %>%
left_join(intercepts_infill_regions)
plot_ly(comb_data_sel_cats_GCAM_reg_final_2) %>%
add_trace(y = ~energy,
x = ~PKcal,
type = "scatter",
mode = "markers",
color = ~region,
legendgroup = ~region,
marker = list(symbol = "cross"),
showlegend = FALSE) %>%
add_trace(x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "lines",
legendgroup = ~region) %>%
add_trace(data = comb_data_sel_GCAM_reg_gcamdata_to_infill_post_1990,
x = ~PKcal,
y = ~PKcal*overall_slope + intercept,
color = ~region,
type = "scatter",
mode = "markers",
legendgroup = ~region) %>%
# also plot without the intercept
add_trace(data = comb_data_sel_GCAM_reg_gcamdata_to_infill_post_1990,
x = ~PKcal,
y = ~PKcal*overall_slope,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "square"),
legendgroup = ~region,
showlegend = FALSE) %>%
# also plot with the coefficient derived without regional fixed effects
add_trace(data = comb_data_sel_GCAM_reg_gcamdata_to_infill_post_1990,
x = ~PKcal,
y = ~PKcal*coef_pkcal_GCAM,
color = ~region,
type = "scatter",
mode = "markers",
marker = list(symbol = "triangle-up"),
legendgroup = ~region,
showlegend = FALSE) %>%
layout(xaxis = list(title = "PKcal"),
yaxis = list(title = "EJ"),
title = "Food processing energy use vs food consumption with infilled data and calculated/estimated intercepts\ncrosses = real data, circles = estimated with estimated intercepts,\nsquares = estimated with zero intercept, triangles = estimated with coefficient without fixed effects\nfor gcamdata input for regions to estimate",
margin = list(t = 75))
Now try calculating the relationship between food consumption and food processing energy use using these data, instead of the more “raw” calorie data.
# combine with energy and GDP data on a region level
comb_data_sel_GCAM_reg_gcamdata <- IEA_foodpro_region_total_en %>%
group_by(GCAM_region_ID, year) %>%
summarize(EJ = sum(value)) %>%
ungroup() %>%
full_join(total_cal_food_gcamdata_GCAM_reg) %>%
full_join(GDP_per_cap_hist_calc_w_FAO %>%
mutate(GDP_thous2015USD_FAO = GDP_pcap_thous2015USD_FAO * pop) %>%
group_by(GCAM_region_ID, year) %>%
summarize(GDP_thous2015USD_FAO = sum(GDP_thous2015USD_FAO),
pop = sum(pop)) %>%
ungroup() %>%
mutate(GDP_pcap_thous2015USD_FAO = GDP_thous2015USD_FAO / pop)) %>%
filter(year %in% all_IEA_years) %>%
left_join(GCAM_region_names)
# filter to just the regions with good data
comb_data_sel_GCAM_reg_gcamdata_final <- comb_data_sel_GCAM_reg_gcamdata %>%
right_join(GCAM_reg_years_incl) %>%
filter(!is.na(PKcal),
PKcal > 0)
# print output of linear models
summary(lm(EJ ~ 0 + PKcal, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ 0 + PKcal, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.83401 -0.01201 0.01547 0.12236 0.83721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.20918 0.03442 35.13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2698 on 435 degrees of freedom
## Multiple R-squared: 0.7394, Adjusted R-squared: 0.7388
## F-statistic: 1234 on 1 and 435 DF, p-value: < 2.2e-16
summary(lm(EJ ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO), comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO),
## data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63971 -0.14364 -0.07643 0.03685 0.68541
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 1.020282 0.033811 30.18 <2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.056166 0.004863 11.55 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2389 on 377 degrees of freedom
## (57 observations deleted due to missingness)
## Multiple R-squared: 0.8065, Adjusted R-squared: 0.8055
## F-statistic: 785.7 on 2 and 377 DF, p-value: < 2.2e-16
summary(lm(EJ ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + region, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## region, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63087 -0.02348 0.00170 0.02374 0.28981
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.15455 0.18832 11.441 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) 0.03133 0.03978 0.788 0.431381
## regionArgentina -0.06927 0.14838 -0.467 0.640881
## regionAustralia_NZ -0.02895 0.14573 -0.199 0.842662
## regionBrazil 0.22696 0.05954 3.812 0.000163 ***
## regionCanada -0.14606 0.14629 -0.998 0.318754
## regionCentral Asia -0.20291 0.09095 -2.231 0.026296 *
## regionChina -1.72832 0.22121 -7.813 6.27e-14 ***
## regionColombia -0.06764 0.05858 -1.155 0.248956
## regionEU-12 -0.10836 0.07382 -1.468 0.142986
## regionEU-15 -0.06250 0.09334 -0.670 0.503570
## regionEurope_Eastern -0.07064 0.04421 -1.598 0.110968
## regionEurope_Non_EU -0.21266 0.07686 -2.767 0.005955 **
## regionEuropean Free Trade Association -0.13263 0.16893 -0.785 0.432888
## regionJapan -0.13202 0.12272 -1.076 0.282778
## regionMexico -0.20728 0.07311 -2.835 0.004840 **
## regionRussia -0.02386 0.06038 -0.395 0.692915
## regionSouth America_Northern -0.10792 0.09397 -1.148 0.251551
## regionSouth Korea -0.12559 0.11006 -1.141 0.254597
## regionSoutheast Asia -0.35431 0.04187 -8.463 6.79e-16 ***
## regionUSA 0.10194 0.10979 0.929 0.353757
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1092 on 358 degrees of freedom
## (57 observations deleted due to missingness)
## Multiple R-squared: 0.9616, Adjusted R-squared: 0.9593
## F-statistic: 426.5 on 21 and 358 DF, p-value: < 2.2e-16
summary(lm(EJ ~ 0 + PKcal + region, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + region, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.63328 -0.01452 0.00111 0.02238 0.29543
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.2590434 0.1040050 21.721 < 2e-16
## regionArgentina 0.0104071 0.1020965 0.102 0.9189
## regionAustralia_NZ 0.0849380 0.0201679 4.212 3.11e-05
## regionBrazil 0.2702093 0.0276846 9.760 < 2e-16
## regionCanada -0.0332550 0.0310324 -1.072 0.2845
## regionCentral Asia -0.1646734 0.0727576 -2.263 0.0241
## regionChina -1.8350921 0.1410253 -13.012 < 2e-16
## regionColombia -0.0243059 0.0203270 -1.196 0.2325
## regionEU-12 -0.0504720 0.0235024 -2.148 0.0323
## regionEU-15 0.0161341 0.0507530 0.318 0.7507
## regionEurope_Eastern -0.0459866 0.0301992 -1.523 0.1286
## regionEurope_Non_EU -0.1692711 0.0229329 -7.381 8.65e-13
## regionEuropean Free Trade Association -0.0004314 0.0200577 -0.022 0.9829
## regionJapan -0.0357243 0.0232517 -1.536 0.1252
## regionMexico -0.1515020 0.0230081 -6.585 1.38e-10
## regionRussia 0.0151043 0.0262003 0.576 0.5646
## regionSouth America_Northern -0.0356232 0.0205331 -1.735 0.0835
## regionSouth Korea -0.0399617 0.0206218 -1.938 0.0533
## regionSoutheast Asia -0.3526609 0.0376423 -9.369 < 2e-16
## regionTaiwan -0.0248699 0.0201395 -1.235 0.2176
## regionUSA 0.1857237 0.0423708 4.383 1.48e-05
##
## PKcal ***
## regionArgentina
## regionAustralia_NZ ***
## regionBrazil ***
## regionCanada
## regionCentral Asia *
## regionChina ***
## regionColombia
## regionEU-12 *
## regionEU-15
## regionEurope_Eastern
## regionEurope_Non_EU ***
## regionEuropean Free Trade Association
## regionJapan
## regionMexico ***
## regionRussia
## regionSouth America_Northern .
## regionSouth Korea .
## regionSoutheast Asia ***
## regionTaiwan
## regionUSA ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.102 on 415 degrees of freedom
## Multiple R-squared: 0.9645, Adjusted R-squared: 0.9627
## F-statistic: 536.6 on 21 and 415 DF, p-value: < 2.2e-16
summary(lm(EJ ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) + region + year, comb_data_sel_GCAM_reg_gcamdata_final))
##
## Call:
## lm(formula = EJ ~ 0 + PKcal + log(GDP_pcap_thous2015USD_FAO) +
## region + year, data = comb_data_sel_GCAM_reg_gcamdata_final)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.61294 -0.02548 0.00015 0.02974 0.30840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## PKcal 2.352531 0.199173 11.811 < 2e-16 ***
## log(GDP_pcap_thous2015USD_FAO) -0.084137 0.056701 -1.484 0.138722
## regionArgentina -7.164012 2.509868 -2.854 0.004564 **
## regionAustralia_NZ -6.956210 2.450671 -2.838 0.004792 **
## regionBrazil -6.930851 2.528523 -2.741 0.006432 **
## regionCanada -7.101961 2.460800 -2.886 0.004138 **
## regionCentral Asia -7.440188 2.557488 -2.909 0.003851 **
## regionChina -9.222314 2.655616 -3.473 0.000578 ***
## regionColombia -7.253025 2.538237 -2.858 0.004520 **
## regionEU-12 -7.237000 2.518595 -2.873 0.004303 **
## regionEU-15 -7.110990 2.490946 -2.855 0.004559 **
## regionEurope_Eastern -7.344996 2.569370 -2.859 0.004504 **
## regionEurope_Non_EU -7.369962 2.528803 -2.914 0.003788 **
## regionEuropean Free Trade Association -6.994317 2.429026 -2.879 0.004224 **
## regionJapan -7.107474 2.466434 -2.882 0.004195 **
## regionMexico -7.332914 2.517516 -2.913 0.003808 **
## regionRussia -7.189892 2.531445 -2.840 0.004766 **
## regionSouth America_Northern -7.187314 2.501876 -2.873 0.004312 **
## regionSouth Korea -7.152624 2.484044 -2.879 0.004224 **
## regionSoutheast Asia -7.653666 2.578160 -2.969 0.003193 **
## regionUSA -6.873756 2.465920 -2.788 0.005596 **
## year 0.003671 0.001297 2.832 0.004894 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1082 on 357 degrees of freedom
## (57 observations deleted due to missingness)
## Multiple R-squared: 0.9624, Adjusted R-squared: 0.9601
## F-statistic: 415.5 on 22 and 357 DF, p-value: < 2.2e-16
These numbers are relatively similar to the values calculated from the more “raw,” less processed data. They do have differences, but overall it is generally good validation that the methods would be similar, we will still use the less processed data.